gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2014 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "stringpool.h"
  31 #include "stor-layout.h"
  32 #include "calls.h"
  33 #include "varasm.h"
  34 #include "obstack.h"
  35 #include "regs.h"
  36 #include "hard-reg-set.h"
  37 #include "insn-config.h"
  38 #include "conditions.h"
  39 #include "output.h"
  40 #include "insn-attr.h"
  41 #include "flags.h"
  42 #include "reload.h"
  43 #include "hashtab.h"
  44 #include "hash-set.h"
  45 #include "vec.h"
  46 #include "machmode.h"
  47 #include "input.h"
  48 #include "function.h"
  49 #include "expr.h"
  50 #include "insn-codes.h"
  51 #include "optabs.h"
  52 #include "diagnostic-core.h"
  53 #include "recog.h"
  54 #include "predict.h"
  55 #include "dominance.h"
  56 #include "cfg.h"
  57 #include "cfgrtl.h"
  58 #include "cfganal.h"
  59 #include "lcm.h"
  60 #include "cfgbuild.h"
  61 #include "cfgcleanup.h"
  62 #include "basic-block.h"
  63 #include "hash-map.h"
  64 #include "is-a.h"
  65 #include "plugin-api.h"
  66 #include "ipa-ref.h"
  67 #include "cgraph.h"
  68 #include "ggc.h"
  69 #include "except.h"
  70 #include "tm_p.h"
  71 #include "target.h"
  72 #include "sched-int.h"
  73 #include "target-def.h"
  74 #include "debug.h"
  75 #include "langhooks.h"
  76 #include "df.h"
  77 #include "intl.h"
  78 #include "libfuncs.h"
  79 #include "params.h"
  80 #include "opts.h"
  81 #include "dumpfile.h"
  82 #include "gimple-expr.h"
  83 #include "builtins.h"
  84 #include "tm-constrs.h"
  85 #include "rtl-iter.h"
  86
  87 /* Forward definitions of types.  */
  88 typedef struct minipool_node    Mnode;
  89 typedef struct minipool_fixup   Mfix;
  90
  91 void (*arm_lang_output_object_attributes_hook)(void);
  92
  93 struct four_ints
  94 {
  95   int i[4];
  96 };
  97
  98 /* Forward function declarations.  */
  99 static bool arm_const_not_ok_for_debug_p (rtx);
 100 static bool arm_lra_p (void);
 101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 102 static int arm_compute_static_chain_stack_bytes (void);
 103 static arm_stack_offsets *arm_get_frame_offsets (void);
 104 static void arm_add_gc_roots (void);
 105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 106                              HOST_WIDE_INT, rtx, rtx, int, int);
 107 static unsigned bit_count (unsigned long);
 108 static int arm_address_register_rtx_p (rtx, int);
 109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 115 inline static int thumb1_index_register_rtx_p (rtx, int);
 116 static int thumb_far_jump_used_p (void);
 117 static bool thumb_force_lr_save (void);
 118 static unsigned arm_size_return_regs (void);
 119 static bool arm_assemble_integer (rtx, unsigned int, int);
 120 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
 121 static void arm_print_operand (FILE *, rtx, int);
 122 static void arm_print_operand_address (FILE *, rtx);
 123 static bool arm_print_operand_punct_valid_p (unsigned char code);
 124 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 125 static arm_cc get_arm_condition_code (rtx);
 126 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 127 static const char *output_multi_immediate (rtx *, const char *, const char *,
 128                                            int, HOST_WIDE_INT);
 129 static const char *shift_op (rtx, HOST_WIDE_INT *);
 130 static struct machine_function *arm_init_machine_status (void);
 131 static void thumb_exit (FILE *, int);
 132 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 133 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 134 static Mnode *add_minipool_forward_ref (Mfix *);
 135 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 136 static Mnode *add_minipool_backward_ref (Mfix *);
 137 static void assign_minipool_offsets (Mfix *);
 138 static void arm_print_value (FILE *, rtx);
 139 static void dump_minipool (rtx_insn *);
 140 static int arm_barrier_cost (rtx);
 141 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 142 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 143 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 144                                machine_mode, rtx);
 145 static void arm_reorg (void);
 146 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 148 static unsigned long arm_compute_save_reg_mask (void);
 149 static unsigned long arm_isr_value (tree);
 150 static unsigned long arm_compute_func_type (void);
 151 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 152 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 156 #endif
 157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 159 static int arm_comp_type_attributes (const_tree, const_tree);
 160 static void arm_set_default_type_attributes (tree);
 161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 163 static int optimal_immediate_sequence (enum rtx_code code,
 164                                        unsigned HOST_WIDE_INT val,
 165                                        struct four_ints *return_sequence);
 166 static int optimal_immediate_sequence_1 (enum rtx_code code,
 167                                          unsigned HOST_WIDE_INT val,
 168                                          struct four_ints *return_sequence,
 169                                          int i);
 170 static int arm_get_strip_length (int);
 171 static bool arm_function_ok_for_sibcall (tree, tree);
 172 static machine_mode arm_promote_function_mode (const_tree,
 173                                                     machine_mode, int *,
 174                                                     const_tree, int);
 175 static bool arm_return_in_memory (const_tree, const_tree);
 176 static rtx arm_function_value (const_tree, const_tree, bool);
 177 static rtx arm_libcall_value_1 (machine_mode);
 178 static rtx arm_libcall_value (machine_mode, const_rtx);
 179 static bool arm_function_value_regno_p (const unsigned int);
 180 static void arm_internal_label (FILE *, const char *, unsigned long);
 181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 182                                  tree);
 183 static bool arm_have_conditional_execution (void);
 184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 185 static bool arm_legitimate_constant_p (machine_mode, rtx);
 186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 196 static void arm_init_builtins (void);
 197 static void arm_init_iwmmxt_builtins (void);
 198 static rtx safe_vector_operand (rtx, machine_mode);
 199 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
 200 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
 201 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
 202 static tree arm_builtin_decl (unsigned, bool);
 203 static void emit_constant_insn (rtx cond, rtx pattern);
 204 static rtx_insn *emit_set_insn (rtx, rtx);
 205 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 206 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 207                                   tree, bool);
 208 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 209                              const_tree, bool);
 210 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 211                                       const_tree, bool);
 212 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 213 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 214                                       const_tree);
 215 static rtx aapcs_libcall_value (machine_mode);
 216 static int aapcs_select_return_coproc (const_tree, const_tree);
 217
 218 #ifdef OBJECT_FORMAT_ELF
 219 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 220 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 221 #endif
 222 #ifndef ARM_PE
 223 static void arm_encode_section_info (tree, rtx, int);
 224 #endif
 225
 226 static void arm_file_end (void);
 227 static void arm_file_start (void);
 228
 229 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 230                                         tree, int *, int);
 231 static bool arm_pass_by_reference (cumulative_args_t,
 232                                    machine_mode, const_tree, bool);
 233 static bool arm_promote_prototypes (const_tree);
 234 static bool arm_default_short_enums (void);
 235 static bool arm_align_anon_bitfield (void);
 236 static bool arm_return_in_msb (const_tree);
 237 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 238 static bool arm_return_in_memory (const_tree, const_tree);
 239 #if ARM_UNWIND_INFO
 240 static void arm_unwind_emit (FILE *, rtx_insn *);
 241 static bool arm_output_ttype (rtx);
 242 static void arm_asm_emit_except_personality (rtx);
 243 static void arm_asm_init_sections (void);
 244 #endif
 245 static rtx arm_dwarf_register_span (rtx);
 246
 247 static tree arm_cxx_guard_type (void);
 248 static bool arm_cxx_guard_mask_bit (void);
 249 static tree arm_get_cookie_size (tree);
 250 static bool arm_cookie_has_size (void);
 251 static bool arm_cxx_cdtor_returns_this (void);
 252 static bool arm_cxx_key_method_may_be_inline (void);
 253 static void arm_cxx_determine_class_data_visibility (tree);
 254 static bool arm_cxx_class_data_always_comdat (void);
 255 static bool arm_cxx_use_aeabi_atexit (void);
 256 static void arm_init_libfuncs (void);
 257 static tree arm_build_builtin_va_list (void);
 258 static void arm_expand_builtin_va_start (tree, rtx);
 259 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 260 static void arm_option_override (void);
 261 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 262 static bool arm_cannot_copy_insn_p (rtx_insn *);
 263 static int arm_issue_rate (void);
 264 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 265 static bool arm_output_addr_const_extra (FILE *, rtx);
 266 static bool arm_allocate_stack_slots_for_args (void);
 267 static bool arm_warn_func_return (tree);
 268 static const char *arm_invalid_parameter_type (const_tree t);
 269 static const char *arm_invalid_return_type (const_tree t);
 270 static tree arm_promoted_type (const_tree t);
 271 static tree arm_convert_to_type (tree type, tree expr);
 272 static bool arm_scalar_mode_supported_p (machine_mode);
 273 static bool arm_frame_pointer_required (void);
 274 static bool arm_can_eliminate (const int, const int);
 275 static void arm_asm_trampoline_template (FILE *);
 276 static void arm_trampoline_init (rtx, tree, rtx);
 277 static rtx arm_trampoline_adjust_address (rtx);
 278 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 280 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 281 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 282 static bool arm_array_mode_supported_p (machine_mode,
 283                                         unsigned HOST_WIDE_INT);
 284 static machine_mode arm_preferred_simd_mode (machine_mode);
 285 static bool arm_class_likely_spilled_p (reg_class_t);
 286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 289                                                      const_tree type,
 290                                                      int misalignment,
 291                                                      bool is_packed);
 292 static void arm_conditional_register_usage (void);
 293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 294 static unsigned int arm_autovectorize_vector_sizes (void);
 295 static int arm_default_branch_cost (bool, bool);
 296 static int arm_cortex_a5_branch_cost (bool, bool);
 297 static int arm_cortex_m_branch_cost (bool, bool);
 298
 299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 300                                              const unsigned char *sel);
 301
 302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 303                                            tree vectype,
 304                                            int misalign ATTRIBUTE_UNUSED);
 305 static unsigned arm_add_stmt_cost (void *data, int count,
 306                                    enum vect_cost_for_stmt kind,
 307                                    struct _stmt_vec_info *stmt_info,
 308                                    int misalign,
 309                                    enum vect_cost_model_location where);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 \f
 317 /* Table of machine attributes.  */
 318 static const struct attribute_spec arm_attribute_table[] =
 319 {
 320   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 321        affects_type_identity } */
 322   /* Function calls made to this symbol must be done indirectly, because
 323      it may lie outside of the 26 bit addressing range of a normal function
 324      call.  */
 325   { "long_call",    0, 0, false, true,  true,  NULL, false },
 326   /* Whereas these functions are always known to reside within the 26 bit
 327      addressing range.  */
 328   { "short_call",   0, 0, false, true,  true,  NULL, false },
 329   /* Specify the procedure call conventions for a function.  */
 330   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 331     false },
 332   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 333   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 334     false },
 335   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 336     false },
 337   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 338     false },
 339 #ifdef ARM_PE
 340   /* ARM/PE has three new attributes:
 341      interfacearm - ?
 342      dllexport - for exporting a function/variable that will live in a dll
 343      dllimport - for importing a function/variable from a dll
 344
 345      Microsoft allows multiple declspecs in one __declspec, separating
 346      them with spaces.  We do NOT support this.  Instead, use __declspec
 347      multiple times.
 348   */
 349   { "dllimport",    0, 0, true,  false, false, NULL, false },
 350   { "dllexport",    0, 0, true,  false, false, NULL, false },
 351   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 352     false },
 353 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 354   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 355   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 356   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 357     false },
 358 #endif
 359   { NULL,           0, 0, false, false, false, NULL, false }
 360 };
 361 \f
 362 /* Initialize the GCC target structure.  */
 363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 364 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 365 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 366 #endif
 367
 368 #undef TARGET_LEGITIMIZE_ADDRESS
 369 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 370
 371 #undef TARGET_LRA_P
 372 #define TARGET_LRA_P arm_lra_p
 373
 374 #undef  TARGET_ATTRIBUTE_TABLE
 375 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 376
 377 #undef TARGET_ASM_FILE_START
 378 #define TARGET_ASM_FILE_START arm_file_start
 379 #undef TARGET_ASM_FILE_END
 380 #define TARGET_ASM_FILE_END arm_file_end
 381
 382 #undef  TARGET_ASM_ALIGNED_SI_OP
 383 #define TARGET_ASM_ALIGNED_SI_OP NULL
 384 #undef  TARGET_ASM_INTEGER
 385 #define TARGET_ASM_INTEGER arm_assemble_integer
 386
 387 #undef TARGET_PRINT_OPERAND
 388 #define TARGET_PRINT_OPERAND arm_print_operand
 389 #undef TARGET_PRINT_OPERAND_ADDRESS
 390 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 391 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 392 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 393
 394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 396
 397 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 398 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 399
 400 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 401 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 402
 403 #undef  TARGET_OPTION_OVERRIDE
 404 #define TARGET_OPTION_OVERRIDE arm_option_override
 405
 406 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 407 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 408
 409 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 410 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 411
 412 #undef  TARGET_SCHED_ADJUST_COST
 413 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 414
 415 #undef TARGET_SCHED_REORDER
 416 #define TARGET_SCHED_REORDER arm_sched_reorder
 417
 418 #undef TARGET_REGISTER_MOVE_COST
 419 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 420
 421 #undef TARGET_MEMORY_MOVE_COST
 422 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 423
 424 #undef TARGET_ENCODE_SECTION_INFO
 425 #ifdef ARM_PE
 426 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 427 #else
 428 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 429 #endif
 430
 431 #undef  TARGET_STRIP_NAME_ENCODING
 432 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 433
 434 #undef  TARGET_ASM_INTERNAL_LABEL
 435 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 436
 437 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 438 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 439
 440 #undef  TARGET_FUNCTION_VALUE
 441 #define TARGET_FUNCTION_VALUE arm_function_value
 442
 443 #undef  TARGET_LIBCALL_VALUE
 444 #define TARGET_LIBCALL_VALUE arm_libcall_value
 445
 446 #undef TARGET_FUNCTION_VALUE_REGNO_P
 447 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 448
 449 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 450 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 451 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 452 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 453
 454 #undef  TARGET_RTX_COSTS
 455 #define TARGET_RTX_COSTS arm_rtx_costs
 456 #undef  TARGET_ADDRESS_COST
 457 #define TARGET_ADDRESS_COST arm_address_cost
 458
 459 #undef TARGET_SHIFT_TRUNCATION_MASK
 460 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 461 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 462 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 463 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 464 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 465 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 466 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 467 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 468 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 469   arm_autovectorize_vector_sizes
 470
 471 #undef  TARGET_MACHINE_DEPENDENT_REORG
 472 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 473
 474 #undef  TARGET_INIT_BUILTINS
 475 #define TARGET_INIT_BUILTINS  arm_init_builtins
 476 #undef  TARGET_EXPAND_BUILTIN
 477 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 478 #undef  TARGET_BUILTIN_DECL
 479 #define TARGET_BUILTIN_DECL arm_builtin_decl
 480
 481 #undef TARGET_INIT_LIBFUNCS
 482 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 483
 484 #undef TARGET_PROMOTE_FUNCTION_MODE
 485 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 486 #undef TARGET_PROMOTE_PROTOTYPES
 487 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 488 #undef TARGET_PASS_BY_REFERENCE
 489 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 490 #undef TARGET_ARG_PARTIAL_BYTES
 491 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 492 #undef TARGET_FUNCTION_ARG
 493 #define TARGET_FUNCTION_ARG arm_function_arg
 494 #undef TARGET_FUNCTION_ARG_ADVANCE
 495 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 496 #undef TARGET_FUNCTION_ARG_BOUNDARY
 497 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 498
 499 #undef  TARGET_SETUP_INCOMING_VARARGS
 500 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 501
 502 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 503 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 504
 505 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 506 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 507 #undef TARGET_TRAMPOLINE_INIT
 508 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 509 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 510 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 511
 512 #undef TARGET_WARN_FUNC_RETURN
 513 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 514
 515 #undef TARGET_DEFAULT_SHORT_ENUMS
 516 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 517
 518 #undef TARGET_ALIGN_ANON_BITFIELD
 519 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 520
 521 #undef TARGET_NARROW_VOLATILE_BITFIELD
 522 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 523
 524 #undef TARGET_CXX_GUARD_TYPE
 525 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 526
 527 #undef TARGET_CXX_GUARD_MASK_BIT
 528 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 529
 530 #undef TARGET_CXX_GET_COOKIE_SIZE
 531 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 532
 533 #undef TARGET_CXX_COOKIE_HAS_SIZE
 534 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 535
 536 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 537 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 538
 539 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 540 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 541
 542 #undef TARGET_CXX_USE_AEABI_ATEXIT
 543 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 544
 545 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 546 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 547   arm_cxx_determine_class_data_visibility
 548
 549 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 550 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 551
 552 #undef TARGET_RETURN_IN_MSB
 553 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 554
 555 #undef TARGET_RETURN_IN_MEMORY
 556 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 557
 558 #undef TARGET_MUST_PASS_IN_STACK
 559 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 560
 561 #if ARM_UNWIND_INFO
 562 #undef TARGET_ASM_UNWIND_EMIT
 563 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 564
 565 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 566 #undef TARGET_ASM_TTYPE
 567 #define TARGET_ASM_TTYPE arm_output_ttype
 568
 569 #undef TARGET_ARM_EABI_UNWINDER
 570 #define TARGET_ARM_EABI_UNWINDER true
 571
 572 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 573 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 574
 575 #undef TARGET_ASM_INIT_SECTIONS
 576 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 577 #endif /* ARM_UNWIND_INFO */
 578
 579 #undef TARGET_DWARF_REGISTER_SPAN
 580 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 581
 582 #undef  TARGET_CANNOT_COPY_INSN_P
 583 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 584
 585 #ifdef HAVE_AS_TLS
 586 #undef TARGET_HAVE_TLS
 587 #define TARGET_HAVE_TLS true
 588 #endif
 589
 590 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 591 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 592
 593 #undef TARGET_LEGITIMATE_CONSTANT_P
 594 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 595
 596 #undef TARGET_CANNOT_FORCE_CONST_MEM
 597 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 598
 599 #undef TARGET_MAX_ANCHOR_OFFSET
 600 #define TARGET_MAX_ANCHOR_OFFSET 4095
 601
 602 /* The minimum is set such that the total size of the block
 603    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 604    divisible by eight, ensuring natural spacing of anchors.  */
 605 #undef TARGET_MIN_ANCHOR_OFFSET
 606 #define TARGET_MIN_ANCHOR_OFFSET -4088
 607
 608 #undef TARGET_SCHED_ISSUE_RATE
 609 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 610
 611 #undef TARGET_MANGLE_TYPE
 612 #define TARGET_MANGLE_TYPE arm_mangle_type
 613
 614 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 615 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 616
 617 #undef TARGET_BUILD_BUILTIN_VA_LIST
 618 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 619 #undef TARGET_EXPAND_BUILTIN_VA_START
 620 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 621 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 622 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 623
 624 #ifdef HAVE_AS_TLS
 625 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 626 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 627 #endif
 628
 629 #undef TARGET_LEGITIMATE_ADDRESS_P
 630 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 631
 632 #undef TARGET_PREFERRED_RELOAD_CLASS
 633 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 634
 635 #undef TARGET_INVALID_PARAMETER_TYPE
 636 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 637
 638 #undef TARGET_INVALID_RETURN_TYPE
 639 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 640
 641 #undef TARGET_PROMOTED_TYPE
 642 #define TARGET_PROMOTED_TYPE arm_promoted_type
 643
 644 #undef TARGET_CONVERT_TO_TYPE
 645 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 646
 647 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 648 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 649
 650 #undef TARGET_FRAME_POINTER_REQUIRED
 651 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 652
 653 #undef TARGET_CAN_ELIMINATE
 654 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 655
 656 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 657 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 658
 659 #undef TARGET_CLASS_LIKELY_SPILLED_P
 660 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 661
 662 #undef TARGET_VECTORIZE_BUILTINS
 663 #define TARGET_VECTORIZE_BUILTINS
 664
 665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 667   arm_builtin_vectorized_function
 668
 669 #undef TARGET_VECTOR_ALIGNMENT
 670 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 671
 672 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 673 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 674   arm_vector_alignment_reachable
 675
 676 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 677 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 678   arm_builtin_support_vector_misalignment
 679
 680 #undef TARGET_PREFERRED_RENAME_CLASS
 681 #define TARGET_PREFERRED_RENAME_CLASS \
 682   arm_preferred_rename_class
 683
 684 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 685 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 686   arm_vectorize_vec_perm_const_ok
 687
 688 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 689 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 690   arm_builtin_vectorization_cost
 691 #undef TARGET_VECTORIZE_ADD_STMT_COST
 692 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 693
 694 #undef TARGET_CANONICALIZE_COMPARISON
 695 #define TARGET_CANONICALIZE_COMPARISON \
 696   arm_canonicalize_comparison
 697
 698 #undef TARGET_ASAN_SHADOW_OFFSET
 699 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 700
 701 #undef MAX_INSN_PER_IT_BLOCK
 702 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 703
 704 #undef TARGET_CAN_USE_DOLOOP_P
 705 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 706
 707 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 708 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 709
 710 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 711 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 712
 713 #undef TARGET_SCHED_FUSION_PRIORITY
 714 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 715
 716 struct gcc_target targetm = TARGET_INITIALIZER;
 717 \f
 718 /* Obstack for minipool constant handling.  */
 719 static struct obstack minipool_obstack;
 720 static char *         minipool_startobj;
 721
 722 /* The maximum number of insns skipped which
 723    will be conditionalised if possible.  */
 724 static int max_insns_skipped = 5;
 725
 726 extern FILE * asm_out_file;
 727
 728 /* True if we are currently building a constant table.  */
 729 int making_const_table;
 730
 731 /* The processor for which instructions should be scheduled.  */
 732 enum processor_type arm_tune = arm_none;
 733
 734 /* The current tuning set.  */
 735 const struct tune_params *current_tune;
 736
 737 /* Which floating point hardware to schedule for.  */
 738 int arm_fpu_attr;
 739
 740 /* Which floating popint hardware to use.  */
 741 const struct arm_fpu_desc *arm_fpu_desc;
 742
 743 /* Used for Thumb call_via trampolines.  */
 744 rtx thumb_call_via_label[14];
 745 static int thumb_call_reg_needed;
 746
 747 /* The bits in this mask specify which
 748    instructions we are allowed to generate.  */
 749 unsigned long insn_flags = 0;
 750
 751 /* The bits in this mask specify which instruction scheduling options should
 752    be used.  */
 753 unsigned long tune_flags = 0;
 754
 755 /* The highest ARM architecture version supported by the
 756    target.  */
 757 enum base_architecture arm_base_arch = BASE_ARCH_0;
 758
 759 /* The following are used in the arm.md file as equivalents to bits
 760    in the above two flag variables.  */
 761
 762 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 763 int arm_arch3m = 0;
 764
 765 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 766 int arm_arch4 = 0;
 767
 768 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 769 int arm_arch4t = 0;
 770
 771 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 772 int arm_arch5 = 0;
 773
 774 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 775 int arm_arch5e = 0;
 776
 777 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 778 int arm_arch6 = 0;
 779
 780 /* Nonzero if this chip supports the ARM 6K extensions.  */
 781 int arm_arch6k = 0;
 782
 783 /* Nonzero if instructions present in ARMv6-M can be used.  */
 784 int arm_arch6m = 0;
 785
 786 /* Nonzero if this chip supports the ARM 7 extensions.  */
 787 int arm_arch7 = 0;
 788
 789 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 790 int arm_arch_notm = 0;
 791
 792 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 793 int arm_arch7em = 0;
 794
 795 /* Nonzero if instructions present in ARMv8 can be used.  */
 796 int arm_arch8 = 0;
 797
 798 /* Nonzero if this chip can benefit from load scheduling.  */
 799 int arm_ld_sched = 0;
 800
 801 /* Nonzero if this chip is a StrongARM.  */
 802 int arm_tune_strongarm = 0;
 803
 804 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 805 int arm_arch_iwmmxt = 0;
 806
 807 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 808 int arm_arch_iwmmxt2 = 0;
 809
 810 /* Nonzero if this chip is an XScale.  */
 811 int arm_arch_xscale = 0;
 812
 813 /* Nonzero if tuning for XScale  */
 814 int arm_tune_xscale = 0;
 815
 816 /* Nonzero if we want to tune for stores that access the write-buffer.
 817    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 818 int arm_tune_wbuf = 0;
 819
 820 /* Nonzero if tuning for Cortex-A9.  */
 821 int arm_tune_cortex_a9 = 0;
 822
 823 /* Nonzero if generating Thumb instructions.  */
 824 int thumb_code = 0;
 825
 826 /* Nonzero if generating Thumb-1 instructions.  */
 827 int thumb1_code = 0;
 828
 829 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 830    preprocessor.
 831    XXX This is a bit of a hack, it's intended to help work around
 832    problems in GLD which doesn't understand that armv5t code is
 833    interworking clean.  */
 834 int arm_cpp_interwork = 0;
 835
 836 /* Nonzero if chip supports Thumb 2.  */
 837 int arm_arch_thumb2;
 838
 839 /* Nonzero if chip supports integer division instruction.  */
 840 int arm_arch_arm_hwdiv;
 841 int arm_arch_thumb_hwdiv;
 842
 843 /* Nonzero if we should use Neon to handle 64-bits operations rather
 844    than core registers.  */
 845 int prefer_neon_for_64bits = 0;
 846
 847 /* Nonzero if we shouldn't use literal pools.  */
 848 bool arm_disable_literal_pool = false;
 849
 850 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 851    we must report the mode of the memory reference from
 852    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 853 machine_mode output_memory_reference_mode;
 854
 855 /* The register number to be used for the PIC offset register.  */
 856 unsigned arm_pic_register = INVALID_REGNUM;
 857
 858 enum arm_pcs arm_pcs_default;
 859
 860 /* For an explanation of these variables, see final_prescan_insn below.  */
 861 int arm_ccfsm_state;
 862 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 863 enum arm_cond_code arm_current_cc;
 864
 865 rtx arm_target_insn;
 866 int arm_target_label;
 867 /* The number of conditionally executed insns, including the current insn.  */
 868 int arm_condexec_count = 0;
 869 /* A bitmask specifying the patterns for the IT block.
 870    Zero means do not output an IT block before this insn. */
 871 int arm_condexec_mask = 0;
 872 /* The number of bits used in arm_condexec_mask.  */
 873 int arm_condexec_masklen = 0;
 874
 875 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 876 int arm_arch_crc = 0;
 877
 878 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 879 int arm_m_profile_small_mul = 0;
 880
 881 /* The condition codes of the ARM, and the inverse function.  */
 882 static const char * const arm_condition_codes[] =
 883 {
 884   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 885   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 886 };
 887
 888 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 889 int arm_regs_in_sequence[] =
 890 {
 891   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 892 };
 893
 894 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 895 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 896
 897 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 898                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 899                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 900 \f
 901 /* Initialization code.  */
 902
 903 struct processors
 904 {
 905   const char *const name;
 906   enum processor_type core;
 907   const char *arch;
 908   enum base_architecture base_arch;
 909   const unsigned long flags;
 910   const struct tune_params *const tune;
 911 };
 912
 913
 914 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 915 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 916   prefetch_slots, \
 917   l1_size, \
 918   l1_line_size
 919
 920 /* arm generic vectorizer costs.  */
 921 static const
 922 struct cpu_vec_costs arm_default_vec_cost = {
 923   1,                                    /* scalar_stmt_cost.  */
 924   1,                                    /* scalar load_cost.  */
 925   1,                                    /* scalar_store_cost.  */
 926   1,                                    /* vec_stmt_cost.  */
 927   1,                                    /* vec_to_scalar_cost.  */
 928   1,                                    /* scalar_to_vec_cost.  */
 929   1,                                    /* vec_align_load_cost.  */
 930   1,                                    /* vec_unalign_load_cost.  */
 931   1,                                    /* vec_unalign_store_cost.  */
 932   1,                                    /* vec_store_cost.  */
 933   3,                                    /* cond_taken_branch_cost.  */
 934   1,                                    /* cond_not_taken_branch_cost.  */
 935 };
 936
 937 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 938 #include "aarch-cost-tables.h"
 939
 940
 941
 942 const struct cpu_cost_table cortexa9_extra_costs =
 943 {
 944   /* ALU */
 945   {
 946     0,                  /* arith.  */
 947     0,                  /* logical.  */
 948     0,                  /* shift.  */
 949     COSTS_N_INSNS (1),  /* shift_reg.  */
 950     COSTS_N_INSNS (1),  /* arith_shift.  */
 951     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
 952     0,                  /* log_shift.  */
 953     COSTS_N_INSNS (1),  /* log_shift_reg.  */
 954     COSTS_N_INSNS (1),  /* extend.  */
 955     COSTS_N_INSNS (2),  /* extend_arith.  */
 956     COSTS_N_INSNS (1),  /* bfi.  */
 957     COSTS_N_INSNS (1),  /* bfx.  */
 958     0,                  /* clz.  */
 959     0,                  /* rev.  */
 960     0,                  /* non_exec.  */
 961     true                /* non_exec_costs_exec.  */
 962   },
 963   {
 964     /* MULT SImode */
 965     {
 966       COSTS_N_INSNS (3),        /* simple.  */
 967       COSTS_N_INSNS (3),        /* flag_setting.  */
 968       COSTS_N_INSNS (2),        /* extend.  */
 969       COSTS_N_INSNS (3),        /* add.  */
 970       COSTS_N_INSNS (2),        /* extend_add.  */
 971       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
 972     },
 973     /* MULT DImode */
 974     {
 975       0,                        /* simple (N/A).  */
 976       0,                        /* flag_setting (N/A).  */
 977       COSTS_N_INSNS (4),        /* extend.  */
 978       0,                        /* add (N/A).  */
 979       COSTS_N_INSNS (4),        /* extend_add.  */
 980       0                         /* idiv (N/A).  */
 981     }
 982   },
 983   /* LD/ST */
 984   {
 985     COSTS_N_INSNS (2),  /* load.  */
 986     COSTS_N_INSNS (2),  /* load_sign_extend.  */
 987     COSTS_N_INSNS (2),  /* ldrd.  */
 988     COSTS_N_INSNS (2),  /* ldm_1st.  */
 989     1,                  /* ldm_regs_per_insn_1st.  */
 990     2,                  /* ldm_regs_per_insn_subsequent.  */
 991     COSTS_N_INSNS (5),  /* loadf.  */
 992     COSTS_N_INSNS (5),  /* loadd.  */
 993     COSTS_N_INSNS (1),  /* load_unaligned.  */
 994     COSTS_N_INSNS (2),  /* store.  */
 995     COSTS_N_INSNS (2),  /* strd.  */
 996     COSTS_N_INSNS (2),  /* stm_1st.  */
 997     1,                  /* stm_regs_per_insn_1st.  */
 998     2,                  /* stm_regs_per_insn_subsequent.  */
 999     COSTS_N_INSNS (1),  /* storef.  */
1000     COSTS_N_INSNS (1),  /* stored.  */
1001     COSTS_N_INSNS (1)   /* store_unaligned.  */
1002   },
1003   {
1004     /* FP SFmode */
1005     {
1006       COSTS_N_INSNS (14),       /* div.  */
1007       COSTS_N_INSNS (4),        /* mult.  */
1008       COSTS_N_INSNS (7),        /* mult_addsub. */
1009       COSTS_N_INSNS (30),       /* fma.  */
1010       COSTS_N_INSNS (3),        /* addsub.  */
1011       COSTS_N_INSNS (1),        /* fpconst.  */
1012       COSTS_N_INSNS (1),        /* neg.  */
1013       COSTS_N_INSNS (3),        /* compare.  */
1014       COSTS_N_INSNS (3),        /* widen.  */
1015       COSTS_N_INSNS (3),        /* narrow.  */
1016       COSTS_N_INSNS (3),        /* toint.  */
1017       COSTS_N_INSNS (3),        /* fromint.  */
1018       COSTS_N_INSNS (3)         /* roundint.  */
1019     },
1020     /* FP DFmode */
1021     {
1022       COSTS_N_INSNS (24),       /* div.  */
1023       COSTS_N_INSNS (5),        /* mult.  */
1024       COSTS_N_INSNS (8),        /* mult_addsub.  */
1025       COSTS_N_INSNS (30),       /* fma.  */
1026       COSTS_N_INSNS (3),        /* addsub.  */
1027       COSTS_N_INSNS (1),        /* fpconst.  */
1028       COSTS_N_INSNS (1),        /* neg.  */
1029       COSTS_N_INSNS (3),        /* compare.  */
1030       COSTS_N_INSNS (3),        /* widen.  */
1031       COSTS_N_INSNS (3),        /* narrow.  */
1032       COSTS_N_INSNS (3),        /* toint.  */
1033       COSTS_N_INSNS (3),        /* fromint.  */
1034       COSTS_N_INSNS (3)         /* roundint.  */
1035     }
1036   },
1037   /* Vector */
1038   {
1039     COSTS_N_INSNS (1)   /* alu.  */
1040   }
1041 };
1042
1043 const struct cpu_cost_table cortexa8_extra_costs =
1044 {
1045   /* ALU */
1046   {
1047     0,                  /* arith.  */
1048     0,                  /* logical.  */
1049     COSTS_N_INSNS (1),  /* shift.  */
1050     0,                  /* shift_reg.  */
1051     COSTS_N_INSNS (1),  /* arith_shift.  */
1052     0,                  /* arith_shift_reg.  */
1053     COSTS_N_INSNS (1),  /* log_shift.  */
1054     0,                  /* log_shift_reg.  */
1055     0,                  /* extend.  */
1056     0,                  /* extend_arith.  */
1057     0,                  /* bfi.  */
1058     0,                  /* bfx.  */
1059     0,                  /* clz.  */
1060     0,                  /* rev.  */
1061     0,                  /* non_exec.  */
1062     true                /* non_exec_costs_exec.  */
1063   },
1064   {
1065     /* MULT SImode */
1066     {
1067       COSTS_N_INSNS (1),        /* simple.  */
1068       COSTS_N_INSNS (1),        /* flag_setting.  */
1069       COSTS_N_INSNS (1),        /* extend.  */
1070       COSTS_N_INSNS (1),        /* add.  */
1071       COSTS_N_INSNS (1),        /* extend_add.  */
1072       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1073     },
1074     /* MULT DImode */
1075     {
1076       0,                        /* simple (N/A).  */
1077       0,                        /* flag_setting (N/A).  */
1078       COSTS_N_INSNS (2),        /* extend.  */
1079       0,                        /* add (N/A).  */
1080       COSTS_N_INSNS (2),        /* extend_add.  */
1081       0                         /* idiv (N/A).  */
1082     }
1083   },
1084   /* LD/ST */
1085   {
1086     COSTS_N_INSNS (1),  /* load.  */
1087     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1088     COSTS_N_INSNS (1),  /* ldrd.  */
1089     COSTS_N_INSNS (1),  /* ldm_1st.  */
1090     1,                  /* ldm_regs_per_insn_1st.  */
1091     2,                  /* ldm_regs_per_insn_subsequent.  */
1092     COSTS_N_INSNS (1),  /* loadf.  */
1093     COSTS_N_INSNS (1),  /* loadd.  */
1094     COSTS_N_INSNS (1),  /* load_unaligned.  */
1095     COSTS_N_INSNS (1),  /* store.  */
1096     COSTS_N_INSNS (1),  /* strd.  */
1097     COSTS_N_INSNS (1),  /* stm_1st.  */
1098     1,                  /* stm_regs_per_insn_1st.  */
1099     2,                  /* stm_regs_per_insn_subsequent.  */
1100     COSTS_N_INSNS (1),  /* storef.  */
1101     COSTS_N_INSNS (1),  /* stored.  */
1102     COSTS_N_INSNS (1)   /* store_unaligned.  */
1103   },
1104   {
1105     /* FP SFmode */
1106     {
1107       COSTS_N_INSNS (36),       /* div.  */
1108       COSTS_N_INSNS (11),       /* mult.  */
1109       COSTS_N_INSNS (20),       /* mult_addsub. */
1110       COSTS_N_INSNS (30),       /* fma.  */
1111       COSTS_N_INSNS (9),        /* addsub.  */
1112       COSTS_N_INSNS (3),        /* fpconst.  */
1113       COSTS_N_INSNS (3),        /* neg.  */
1114       COSTS_N_INSNS (6),        /* compare.  */
1115       COSTS_N_INSNS (4),        /* widen.  */
1116       COSTS_N_INSNS (4),        /* narrow.  */
1117       COSTS_N_INSNS (8),        /* toint.  */
1118       COSTS_N_INSNS (8),        /* fromint.  */
1119       COSTS_N_INSNS (8)         /* roundint.  */
1120     },
1121     /* FP DFmode */
1122     {
1123       COSTS_N_INSNS (64),       /* div.  */
1124       COSTS_N_INSNS (16),       /* mult.  */
1125       COSTS_N_INSNS (25),       /* mult_addsub.  */
1126       COSTS_N_INSNS (30),       /* fma.  */
1127       COSTS_N_INSNS (9),        /* addsub.  */
1128       COSTS_N_INSNS (3),        /* fpconst.  */
1129       COSTS_N_INSNS (3),        /* neg.  */
1130       COSTS_N_INSNS (6),        /* compare.  */
1131       COSTS_N_INSNS (6),        /* widen.  */
1132       COSTS_N_INSNS (6),        /* narrow.  */
1133       COSTS_N_INSNS (8),        /* toint.  */
1134       COSTS_N_INSNS (8),        /* fromint.  */
1135       COSTS_N_INSNS (8)         /* roundint.  */
1136     }
1137   },
1138   /* Vector */
1139   {
1140     COSTS_N_INSNS (1)   /* alu.  */
1141   }
1142 };
1143
1144 const struct cpu_cost_table cortexa5_extra_costs =
1145 {
1146   /* ALU */
1147   {
1148     0,                  /* arith.  */
1149     0,                  /* logical.  */
1150     COSTS_N_INSNS (1),  /* shift.  */
1151     COSTS_N_INSNS (1),  /* shift_reg.  */
1152     COSTS_N_INSNS (1),  /* arith_shift.  */
1153     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1154     COSTS_N_INSNS (1),  /* log_shift.  */
1155     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1156     COSTS_N_INSNS (1),  /* extend.  */
1157     COSTS_N_INSNS (1),  /* extend_arith.  */
1158     COSTS_N_INSNS (1),  /* bfi.  */
1159     COSTS_N_INSNS (1),  /* bfx.  */
1160     COSTS_N_INSNS (1),  /* clz.  */
1161     COSTS_N_INSNS (1),  /* rev.  */
1162     0,                  /* non_exec.  */
1163     true                /* non_exec_costs_exec.  */
1164   },
1165
1166   {
1167     /* MULT SImode */
1168     {
1169       0,                        /* simple.  */
1170       COSTS_N_INSNS (1),        /* flag_setting.  */
1171       COSTS_N_INSNS (1),        /* extend.  */
1172       COSTS_N_INSNS (1),        /* add.  */
1173       COSTS_N_INSNS (1),        /* extend_add.  */
1174       COSTS_N_INSNS (7)         /* idiv.  */
1175     },
1176     /* MULT DImode */
1177     {
1178       0,                        /* simple (N/A).  */
1179       0,                        /* flag_setting (N/A).  */
1180       COSTS_N_INSNS (1),        /* extend.  */
1181       0,                        /* add.  */
1182       COSTS_N_INSNS (2),        /* extend_add.  */
1183       0                         /* idiv (N/A).  */
1184     }
1185   },
1186   /* LD/ST */
1187   {
1188     COSTS_N_INSNS (1),  /* load.  */
1189     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1190     COSTS_N_INSNS (6),  /* ldrd.  */
1191     COSTS_N_INSNS (1),  /* ldm_1st.  */
1192     1,                  /* ldm_regs_per_insn_1st.  */
1193     2,                  /* ldm_regs_per_insn_subsequent.  */
1194     COSTS_N_INSNS (2),  /* loadf.  */
1195     COSTS_N_INSNS (4),  /* loadd.  */
1196     COSTS_N_INSNS (1),  /* load_unaligned.  */
1197     COSTS_N_INSNS (1),  /* store.  */
1198     COSTS_N_INSNS (3),  /* strd.  */
1199     COSTS_N_INSNS (1),  /* stm_1st.  */
1200     1,                  /* stm_regs_per_insn_1st.  */
1201     2,                  /* stm_regs_per_insn_subsequent.  */
1202     COSTS_N_INSNS (2),  /* storef.  */
1203     COSTS_N_INSNS (2),  /* stored.  */
1204     COSTS_N_INSNS (1)   /* store_unaligned.  */
1205   },
1206   {
1207     /* FP SFmode */
1208     {
1209       COSTS_N_INSNS (15),       /* div.  */
1210       COSTS_N_INSNS (3),        /* mult.  */
1211       COSTS_N_INSNS (7),        /* mult_addsub. */
1212       COSTS_N_INSNS (7),        /* fma.  */
1213       COSTS_N_INSNS (3),        /* addsub.  */
1214       COSTS_N_INSNS (3),        /* fpconst.  */
1215       COSTS_N_INSNS (3),        /* neg.  */
1216       COSTS_N_INSNS (3),        /* compare.  */
1217       COSTS_N_INSNS (3),        /* widen.  */
1218       COSTS_N_INSNS (3),        /* narrow.  */
1219       COSTS_N_INSNS (3),        /* toint.  */
1220       COSTS_N_INSNS (3),        /* fromint.  */
1221       COSTS_N_INSNS (3)         /* roundint.  */
1222     },
1223     /* FP DFmode */
1224     {
1225       COSTS_N_INSNS (30),       /* div.  */
1226       COSTS_N_INSNS (6),        /* mult.  */
1227       COSTS_N_INSNS (10),       /* mult_addsub.  */
1228       COSTS_N_INSNS (7),        /* fma.  */
1229       COSTS_N_INSNS (3),        /* addsub.  */
1230       COSTS_N_INSNS (3),        /* fpconst.  */
1231       COSTS_N_INSNS (3),        /* neg.  */
1232       COSTS_N_INSNS (3),        /* compare.  */
1233       COSTS_N_INSNS (3),        /* widen.  */
1234       COSTS_N_INSNS (3),        /* narrow.  */
1235       COSTS_N_INSNS (3),        /* toint.  */
1236       COSTS_N_INSNS (3),        /* fromint.  */
1237       COSTS_N_INSNS (3)         /* roundint.  */
1238     }
1239   },
1240   /* Vector */
1241   {
1242     COSTS_N_INSNS (1)   /* alu.  */
1243   }
1244 };
1245
1246
1247 const struct cpu_cost_table cortexa7_extra_costs =
1248 {
1249   /* ALU */
1250   {
1251     0,                  /* arith.  */
1252     0,                  /* logical.  */
1253     COSTS_N_INSNS (1),  /* shift.  */
1254     COSTS_N_INSNS (1),  /* shift_reg.  */
1255     COSTS_N_INSNS (1),  /* arith_shift.  */
1256     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1257     COSTS_N_INSNS (1),  /* log_shift.  */
1258     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1259     COSTS_N_INSNS (1),  /* extend.  */
1260     COSTS_N_INSNS (1),  /* extend_arith.  */
1261     COSTS_N_INSNS (1),  /* bfi.  */
1262     COSTS_N_INSNS (1),  /* bfx.  */
1263     COSTS_N_INSNS (1),  /* clz.  */
1264     COSTS_N_INSNS (1),  /* rev.  */
1265     0,                  /* non_exec.  */
1266     true                /* non_exec_costs_exec.  */
1267   },
1268
1269   {
1270     /* MULT SImode */
1271     {
1272       0,                        /* simple.  */
1273       COSTS_N_INSNS (1),        /* flag_setting.  */
1274       COSTS_N_INSNS (1),        /* extend.  */
1275       COSTS_N_INSNS (1),        /* add.  */
1276       COSTS_N_INSNS (1),        /* extend_add.  */
1277       COSTS_N_INSNS (7)         /* idiv.  */
1278     },
1279     /* MULT DImode */
1280     {
1281       0,                        /* simple (N/A).  */
1282       0,                        /* flag_setting (N/A).  */
1283       COSTS_N_INSNS (1),        /* extend.  */
1284       0,                        /* add.  */
1285       COSTS_N_INSNS (2),        /* extend_add.  */
1286       0                         /* idiv (N/A).  */
1287     }
1288   },
1289   /* LD/ST */
1290   {
1291     COSTS_N_INSNS (1),  /* load.  */
1292     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1293     COSTS_N_INSNS (3),  /* ldrd.  */
1294     COSTS_N_INSNS (1),  /* ldm_1st.  */
1295     1,                  /* ldm_regs_per_insn_1st.  */
1296     2,                  /* ldm_regs_per_insn_subsequent.  */
1297     COSTS_N_INSNS (2),  /* loadf.  */
1298     COSTS_N_INSNS (2),  /* loadd.  */
1299     COSTS_N_INSNS (1),  /* load_unaligned.  */
1300     COSTS_N_INSNS (1),  /* store.  */
1301     COSTS_N_INSNS (3),  /* strd.  */
1302     COSTS_N_INSNS (1),  /* stm_1st.  */
1303     1,                  /* stm_regs_per_insn_1st.  */
1304     2,                  /* stm_regs_per_insn_subsequent.  */
1305     COSTS_N_INSNS (2),  /* storef.  */
1306     COSTS_N_INSNS (2),  /* stored.  */
1307     COSTS_N_INSNS (1)   /* store_unaligned.  */
1308   },
1309   {
1310     /* FP SFmode */
1311     {
1312       COSTS_N_INSNS (15),       /* div.  */
1313       COSTS_N_INSNS (3),        /* mult.  */
1314       COSTS_N_INSNS (7),        /* mult_addsub. */
1315       COSTS_N_INSNS (7),        /* fma.  */
1316       COSTS_N_INSNS (3),        /* addsub.  */
1317       COSTS_N_INSNS (3),        /* fpconst.  */
1318       COSTS_N_INSNS (3),        /* neg.  */
1319       COSTS_N_INSNS (3),        /* compare.  */
1320       COSTS_N_INSNS (3),        /* widen.  */
1321       COSTS_N_INSNS (3),        /* narrow.  */
1322       COSTS_N_INSNS (3),        /* toint.  */
1323       COSTS_N_INSNS (3),        /* fromint.  */
1324       COSTS_N_INSNS (3)         /* roundint.  */
1325     },
1326     /* FP DFmode */
1327     {
1328       COSTS_N_INSNS (30),       /* div.  */
1329       COSTS_N_INSNS (6),        /* mult.  */
1330       COSTS_N_INSNS (10),       /* mult_addsub.  */
1331       COSTS_N_INSNS (7),        /* fma.  */
1332       COSTS_N_INSNS (3),        /* addsub.  */
1333       COSTS_N_INSNS (3),        /* fpconst.  */
1334       COSTS_N_INSNS (3),        /* neg.  */
1335       COSTS_N_INSNS (3),        /* compare.  */
1336       COSTS_N_INSNS (3),        /* widen.  */
1337       COSTS_N_INSNS (3),        /* narrow.  */
1338       COSTS_N_INSNS (3),        /* toint.  */
1339       COSTS_N_INSNS (3),        /* fromint.  */
1340       COSTS_N_INSNS (3)         /* roundint.  */
1341     }
1342   },
1343   /* Vector */
1344   {
1345     COSTS_N_INSNS (1)   /* alu.  */
1346   }
1347 };
1348
1349 const struct cpu_cost_table cortexa12_extra_costs =
1350 {
1351   /* ALU */
1352   {
1353     0,                  /* arith.  */
1354     0,                  /* logical.  */
1355     0,                  /* shift.  */
1356     COSTS_N_INSNS (1),  /* shift_reg.  */
1357     COSTS_N_INSNS (1),  /* arith_shift.  */
1358     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1359     COSTS_N_INSNS (1),  /* log_shift.  */
1360     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1361     0,                  /* extend.  */
1362     COSTS_N_INSNS (1),  /* extend_arith.  */
1363     0,                  /* bfi.  */
1364     COSTS_N_INSNS (1),  /* bfx.  */
1365     COSTS_N_INSNS (1),  /* clz.  */
1366     COSTS_N_INSNS (1),  /* rev.  */
1367     0,                  /* non_exec.  */
1368     true                /* non_exec_costs_exec.  */
1369   },
1370   /* MULT SImode */
1371   {
1372     {
1373       COSTS_N_INSNS (2),        /* simple.  */
1374       COSTS_N_INSNS (3),        /* flag_setting.  */
1375       COSTS_N_INSNS (2),        /* extend.  */
1376       COSTS_N_INSNS (3),        /* add.  */
1377       COSTS_N_INSNS (2),        /* extend_add.  */
1378       COSTS_N_INSNS (18)        /* idiv.  */
1379     },
1380     /* MULT DImode */
1381     {
1382       0,                        /* simple (N/A).  */
1383       0,                        /* flag_setting (N/A).  */
1384       COSTS_N_INSNS (3),        /* extend.  */
1385       0,                        /* add (N/A).  */
1386       COSTS_N_INSNS (3),        /* extend_add.  */
1387       0                         /* idiv (N/A).  */
1388     }
1389   },
1390   /* LD/ST */
1391   {
1392     COSTS_N_INSNS (3),  /* load.  */
1393     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1394     COSTS_N_INSNS (3),  /* ldrd.  */
1395     COSTS_N_INSNS (3),  /* ldm_1st.  */
1396     1,                  /* ldm_regs_per_insn_1st.  */
1397     2,                  /* ldm_regs_per_insn_subsequent.  */
1398     COSTS_N_INSNS (3),  /* loadf.  */
1399     COSTS_N_INSNS (3),  /* loadd.  */
1400     0,                  /* load_unaligned.  */
1401     0,                  /* store.  */
1402     0,                  /* strd.  */
1403     0,                  /* stm_1st.  */
1404     1,                  /* stm_regs_per_insn_1st.  */
1405     2,                  /* stm_regs_per_insn_subsequent.  */
1406     COSTS_N_INSNS (2),  /* storef.  */
1407     COSTS_N_INSNS (2),  /* stored.  */
1408     0                   /* store_unaligned.  */
1409   },
1410   {
1411     /* FP SFmode */
1412     {
1413       COSTS_N_INSNS (17),       /* div.  */
1414       COSTS_N_INSNS (4),        /* mult.  */
1415       COSTS_N_INSNS (8),        /* mult_addsub. */
1416       COSTS_N_INSNS (8),        /* fma.  */
1417       COSTS_N_INSNS (4),        /* addsub.  */
1418       COSTS_N_INSNS (2),        /* fpconst. */
1419       COSTS_N_INSNS (2),        /* neg.  */
1420       COSTS_N_INSNS (2),        /* compare.  */
1421       COSTS_N_INSNS (4),        /* widen.  */
1422       COSTS_N_INSNS (4),        /* narrow.  */
1423       COSTS_N_INSNS (4),        /* toint.  */
1424       COSTS_N_INSNS (4),        /* fromint.  */
1425       COSTS_N_INSNS (4)         /* roundint.  */
1426     },
1427     /* FP DFmode */
1428     {
1429       COSTS_N_INSNS (31),       /* div.  */
1430       COSTS_N_INSNS (4),        /* mult.  */
1431       COSTS_N_INSNS (8),        /* mult_addsub.  */
1432       COSTS_N_INSNS (8),        /* fma.  */
1433       COSTS_N_INSNS (4),        /* addsub.  */
1434       COSTS_N_INSNS (2),        /* fpconst.  */
1435       COSTS_N_INSNS (2),        /* neg.  */
1436       COSTS_N_INSNS (2),        /* compare.  */
1437       COSTS_N_INSNS (4),        /* widen.  */
1438       COSTS_N_INSNS (4),        /* narrow.  */
1439       COSTS_N_INSNS (4),        /* toint.  */
1440       COSTS_N_INSNS (4),        /* fromint.  */
1441       COSTS_N_INSNS (4)         /* roundint.  */
1442     }
1443   },
1444   /* Vector */
1445   {
1446     COSTS_N_INSNS (1)   /* alu.  */
1447   }
1448 };
1449
1450 const struct cpu_cost_table cortexa15_extra_costs =
1451 {
1452   /* ALU */
1453   {
1454     0,                  /* arith.  */
1455     0,                  /* logical.  */
1456     0,                  /* shift.  */
1457     0,                  /* shift_reg.  */
1458     COSTS_N_INSNS (1),  /* arith_shift.  */
1459     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1460     COSTS_N_INSNS (1),  /* log_shift.  */
1461     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1462     0,                  /* extend.  */
1463     COSTS_N_INSNS (1),  /* extend_arith.  */
1464     COSTS_N_INSNS (1),  /* bfi.  */
1465     0,                  /* bfx.  */
1466     0,                  /* clz.  */
1467     0,                  /* rev.  */
1468     0,                  /* non_exec.  */
1469     true                /* non_exec_costs_exec.  */
1470   },
1471   /* MULT SImode */
1472   {
1473     {
1474       COSTS_N_INSNS (2),        /* simple.  */
1475       COSTS_N_INSNS (3),        /* flag_setting.  */
1476       COSTS_N_INSNS (2),        /* extend.  */
1477       COSTS_N_INSNS (2),        /* add.  */
1478       COSTS_N_INSNS (2),        /* extend_add.  */
1479       COSTS_N_INSNS (18)        /* idiv.  */
1480     },
1481     /* MULT DImode */
1482     {
1483       0,                        /* simple (N/A).  */
1484       0,                        /* flag_setting (N/A).  */
1485       COSTS_N_INSNS (3),        /* extend.  */
1486       0,                        /* add (N/A).  */
1487       COSTS_N_INSNS (3),        /* extend_add.  */
1488       0                         /* idiv (N/A).  */
1489     }
1490   },
1491   /* LD/ST */
1492   {
1493     COSTS_N_INSNS (3),  /* load.  */
1494     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1495     COSTS_N_INSNS (3),  /* ldrd.  */
1496     COSTS_N_INSNS (4),  /* ldm_1st.  */
1497     1,                  /* ldm_regs_per_insn_1st.  */
1498     2,                  /* ldm_regs_per_insn_subsequent.  */
1499     COSTS_N_INSNS (4),  /* loadf.  */
1500     COSTS_N_INSNS (4),  /* loadd.  */
1501     0,                  /* load_unaligned.  */
1502     0,                  /* store.  */
1503     0,                  /* strd.  */
1504     COSTS_N_INSNS (1),  /* stm_1st.  */
1505     1,                  /* stm_regs_per_insn_1st.  */
1506     2,                  /* stm_regs_per_insn_subsequent.  */
1507     0,                  /* storef.  */
1508     0,                  /* stored.  */
1509     0                   /* store_unaligned.  */
1510   },
1511   {
1512     /* FP SFmode */
1513     {
1514       COSTS_N_INSNS (17),       /* div.  */
1515       COSTS_N_INSNS (4),        /* mult.  */
1516       COSTS_N_INSNS (8),        /* mult_addsub. */
1517       COSTS_N_INSNS (8),        /* fma.  */
1518       COSTS_N_INSNS (4),        /* addsub.  */
1519       COSTS_N_INSNS (2),        /* fpconst. */
1520       COSTS_N_INSNS (2),        /* neg.  */
1521       COSTS_N_INSNS (5),        /* compare.  */
1522       COSTS_N_INSNS (4),        /* widen.  */
1523       COSTS_N_INSNS (4),        /* narrow.  */
1524       COSTS_N_INSNS (4),        /* toint.  */
1525       COSTS_N_INSNS (4),        /* fromint.  */
1526       COSTS_N_INSNS (4)         /* roundint.  */
1527     },
1528     /* FP DFmode */
1529     {
1530       COSTS_N_INSNS (31),       /* div.  */
1531       COSTS_N_INSNS (4),        /* mult.  */
1532       COSTS_N_INSNS (8),        /* mult_addsub.  */
1533       COSTS_N_INSNS (8),        /* fma.  */
1534       COSTS_N_INSNS (4),        /* addsub.  */
1535       COSTS_N_INSNS (2),        /* fpconst.  */
1536       COSTS_N_INSNS (2),        /* neg.  */
1537       COSTS_N_INSNS (2),        /* compare.  */
1538       COSTS_N_INSNS (4),        /* widen.  */
1539       COSTS_N_INSNS (4),        /* narrow.  */
1540       COSTS_N_INSNS (4),        /* toint.  */
1541       COSTS_N_INSNS (4),        /* fromint.  */
1542       COSTS_N_INSNS (4)         /* roundint.  */
1543     }
1544   },
1545   /* Vector */
1546   {
1547     COSTS_N_INSNS (1)   /* alu.  */
1548   }
1549 };
1550
1551 const struct cpu_cost_table v7m_extra_costs =
1552 {
1553   /* ALU */
1554   {
1555     0,                  /* arith.  */
1556     0,                  /* logical.  */
1557     0,                  /* shift.  */
1558     0,                  /* shift_reg.  */
1559     0,                  /* arith_shift.  */
1560     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1561     0,                  /* log_shift.  */
1562     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1563     0,                  /* extend.  */
1564     COSTS_N_INSNS (1),  /* extend_arith.  */
1565     0,                  /* bfi.  */
1566     0,                  /* bfx.  */
1567     0,                  /* clz.  */
1568     0,                  /* rev.  */
1569     COSTS_N_INSNS (1),  /* non_exec.  */
1570     false               /* non_exec_costs_exec.  */
1571   },
1572   {
1573     /* MULT SImode */
1574     {
1575       COSTS_N_INSNS (1),        /* simple.  */
1576       COSTS_N_INSNS (1),        /* flag_setting.  */
1577       COSTS_N_INSNS (2),        /* extend.  */
1578       COSTS_N_INSNS (1),        /* add.  */
1579       COSTS_N_INSNS (3),        /* extend_add.  */
1580       COSTS_N_INSNS (8)         /* idiv.  */
1581     },
1582     /* MULT DImode */
1583     {
1584       0,                        /* simple (N/A).  */
1585       0,                        /* flag_setting (N/A).  */
1586       COSTS_N_INSNS (2),        /* extend.  */
1587       0,                        /* add (N/A).  */
1588       COSTS_N_INSNS (3),        /* extend_add.  */
1589       0                         /* idiv (N/A).  */
1590     }
1591   },
1592   /* LD/ST */
1593   {
1594     COSTS_N_INSNS (2),  /* load.  */
1595     0,                  /* load_sign_extend.  */
1596     COSTS_N_INSNS (3),  /* ldrd.  */
1597     COSTS_N_INSNS (2),  /* ldm_1st.  */
1598     1,                  /* ldm_regs_per_insn_1st.  */
1599     1,                  /* ldm_regs_per_insn_subsequent.  */
1600     COSTS_N_INSNS (2),  /* loadf.  */
1601     COSTS_N_INSNS (3),  /* loadd.  */
1602     COSTS_N_INSNS (1),  /* load_unaligned.  */
1603     COSTS_N_INSNS (2),  /* store.  */
1604     COSTS_N_INSNS (3),  /* strd.  */
1605     COSTS_N_INSNS (2),  /* stm_1st.  */
1606     1,                  /* stm_regs_per_insn_1st.  */
1607     1,                  /* stm_regs_per_insn_subsequent.  */
1608     COSTS_N_INSNS (2),  /* storef.  */
1609     COSTS_N_INSNS (3),  /* stored.  */
1610     COSTS_N_INSNS (1)  /* store_unaligned.  */
1611   },
1612   {
1613     /* FP SFmode */
1614     {
1615       COSTS_N_INSNS (7),        /* div.  */
1616       COSTS_N_INSNS (2),        /* mult.  */
1617       COSTS_N_INSNS (5),        /* mult_addsub.  */
1618       COSTS_N_INSNS (3),        /* fma.  */
1619       COSTS_N_INSNS (1),        /* addsub.  */
1620       0,                        /* fpconst.  */
1621       0,                        /* neg.  */
1622       0,                        /* compare.  */
1623       0,                        /* widen.  */
1624       0,                        /* narrow.  */
1625       0,                        /* toint.  */
1626       0,                        /* fromint.  */
1627       0                         /* roundint.  */
1628     },
1629     /* FP DFmode */
1630     {
1631       COSTS_N_INSNS (15),       /* div.  */
1632       COSTS_N_INSNS (5),        /* mult.  */
1633       COSTS_N_INSNS (7),        /* mult_addsub.  */
1634       COSTS_N_INSNS (7),        /* fma.  */
1635       COSTS_N_INSNS (3),        /* addsub.  */
1636       0,                        /* fpconst.  */
1637       0,                        /* neg.  */
1638       0,                        /* compare.  */
1639       0,                        /* widen.  */
1640       0,                        /* narrow.  */
1641       0,                        /* toint.  */
1642       0,                        /* fromint.  */
1643       0                         /* roundint.  */
1644     }
1645   },
1646   /* Vector */
1647   {
1648     COSTS_N_INSNS (1)   /* alu.  */
1649   }
1650 };
1651
1652 const struct tune_params arm_slowmul_tune =
1653 {
1654   arm_slowmul_rtx_costs,
1655   NULL,
1656   NULL,                                         /* Sched adj cost.  */
1657   3,                                            /* Constant limit.  */
1658   5,                                            /* Max cond insns.  */
1659   ARM_PREFETCH_NOT_BENEFICIAL,
1660   true,                                         /* Prefer constant pool.  */
1661   arm_default_branch_cost,
1662   false,                                        /* Prefer LDRD/STRD.  */
1663   {true, true},                                 /* Prefer non short circuit.  */
1664   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1665   false,                                        /* Prefer Neon for 64-bits bitops.  */
1666   false, false,                                 /* Prefer 32-bit encodings.  */
1667   false,                                        /* Prefer Neon for stringops.  */
1668   8                                             /* Maximum insns to inline memset.  */
1669 };
1670
1671 const struct tune_params arm_fastmul_tune =
1672 {
1673   arm_fastmul_rtx_costs,
1674   NULL,
1675   NULL,                                         /* Sched adj cost.  */
1676   1,                                            /* Constant limit.  */
1677   5,                                            /* Max cond insns.  */
1678   ARM_PREFETCH_NOT_BENEFICIAL,
1679   true,                                         /* Prefer constant pool.  */
1680   arm_default_branch_cost,
1681   false,                                        /* Prefer LDRD/STRD.  */
1682   {true, true},                                 /* Prefer non short circuit.  */
1683   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1684   false,                                        /* Prefer Neon for 64-bits bitops.  */
1685   false, false,                                 /* Prefer 32-bit encodings.  */
1686   false,                                        /* Prefer Neon for stringops.  */
1687   8                                             /* Maximum insns to inline memset.  */
1688 };
1689
1690 /* StrongARM has early execution of branches, so a sequence that is worth
1691    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1692
1693 const struct tune_params arm_strongarm_tune =
1694 {
1695   arm_fastmul_rtx_costs,
1696   NULL,
1697   NULL,                                         /* Sched adj cost.  */
1698   1,                                            /* Constant limit.  */
1699   3,                                            /* Max cond insns.  */
1700   ARM_PREFETCH_NOT_BENEFICIAL,
1701   true,                                         /* Prefer constant pool.  */
1702   arm_default_branch_cost,
1703   false,                                        /* Prefer LDRD/STRD.  */
1704   {true, true},                                 /* Prefer non short circuit.  */
1705   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1706   false,                                        /* Prefer Neon for 64-bits bitops.  */
1707   false, false,                                 /* Prefer 32-bit encodings.  */
1708   false,                                        /* Prefer Neon for stringops.  */
1709   8                                             /* Maximum insns to inline memset.  */
1710 };
1711
1712 const struct tune_params arm_xscale_tune =
1713 {
1714   arm_xscale_rtx_costs,
1715   NULL,
1716   xscale_sched_adjust_cost,
1717   2,                                            /* Constant limit.  */
1718   3,                                            /* Max cond insns.  */
1719   ARM_PREFETCH_NOT_BENEFICIAL,
1720   true,                                         /* Prefer constant pool.  */
1721   arm_default_branch_cost,
1722   false,                                        /* Prefer LDRD/STRD.  */
1723   {true, true},                                 /* Prefer non short circuit.  */
1724   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1725   false,                                        /* Prefer Neon for 64-bits bitops.  */
1726   false, false,                                 /* Prefer 32-bit encodings.  */
1727   false,                                        /* Prefer Neon for stringops.  */
1728   8                                             /* Maximum insns to inline memset.  */
1729 };
1730
1731 const struct tune_params arm_9e_tune =
1732 {
1733   arm_9e_rtx_costs,
1734   NULL,
1735   NULL,                                         /* Sched adj cost.  */
1736   1,                                            /* Constant limit.  */
1737   5,                                            /* Max cond insns.  */
1738   ARM_PREFETCH_NOT_BENEFICIAL,
1739   true,                                         /* Prefer constant pool.  */
1740   arm_default_branch_cost,
1741   false,                                        /* Prefer LDRD/STRD.  */
1742   {true, true},                                 /* Prefer non short circuit.  */
1743   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1744   false,                                        /* Prefer Neon for 64-bits bitops.  */
1745   false, false,                                 /* Prefer 32-bit encodings.  */
1746   false,                                        /* Prefer Neon for stringops.  */
1747   8                                             /* Maximum insns to inline memset.  */
1748 };
1749
1750 const struct tune_params arm_v6t2_tune =
1751 {
1752   arm_9e_rtx_costs,
1753   NULL,
1754   NULL,                                         /* Sched adj cost.  */
1755   1,                                            /* Constant limit.  */
1756   5,                                            /* Max cond insns.  */
1757   ARM_PREFETCH_NOT_BENEFICIAL,
1758   false,                                        /* Prefer constant pool.  */
1759   arm_default_branch_cost,
1760   false,                                        /* Prefer LDRD/STRD.  */
1761   {true, true},                                 /* Prefer non short circuit.  */
1762   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1763   false,                                        /* Prefer Neon for 64-bits bitops.  */
1764   false, false,                                 /* Prefer 32-bit encodings.  */
1765   false,                                        /* Prefer Neon for stringops.  */
1766   8                                             /* Maximum insns to inline memset.  */
1767 };
1768
1769 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1770 const struct tune_params arm_cortex_tune =
1771 {
1772   arm_9e_rtx_costs,
1773   &generic_extra_costs,
1774   NULL,                                         /* Sched adj cost.  */
1775   1,                                            /* Constant limit.  */
1776   5,                                            /* Max cond insns.  */
1777   ARM_PREFETCH_NOT_BENEFICIAL,
1778   false,                                        /* Prefer constant pool.  */
1779   arm_default_branch_cost,
1780   false,                                        /* Prefer LDRD/STRD.  */
1781   {true, true},                                 /* Prefer non short circuit.  */
1782   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1783   false,                                        /* Prefer Neon for 64-bits bitops.  */
1784   false, false,                                 /* Prefer 32-bit encodings.  */
1785   false,                                        /* Prefer Neon for stringops.  */
1786   8                                             /* Maximum insns to inline memset.  */
1787 };
1788
1789 const struct tune_params arm_cortex_a8_tune =
1790 {
1791   arm_9e_rtx_costs,
1792   &cortexa8_extra_costs,
1793   NULL,                                         /* Sched adj cost.  */
1794   1,                                            /* Constant limit.  */
1795   5,                                            /* Max cond insns.  */
1796   ARM_PREFETCH_NOT_BENEFICIAL,
1797   false,                                        /* Prefer constant pool.  */
1798   arm_default_branch_cost,
1799   false,                                        /* Prefer LDRD/STRD.  */
1800   {true, true},                                 /* Prefer non short circuit.  */
1801   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1802   false,                                        /* Prefer Neon for 64-bits bitops.  */
1803   false, false,                                 /* Prefer 32-bit encodings.  */
1804   true,                                         /* Prefer Neon for stringops.  */
1805   8                                             /* Maximum insns to inline memset.  */
1806 };
1807
1808 const struct tune_params arm_cortex_a7_tune =
1809 {
1810   arm_9e_rtx_costs,
1811   &cortexa7_extra_costs,
1812   NULL,
1813   1,                                            /* Constant limit.  */
1814   5,                                            /* Max cond insns.  */
1815   ARM_PREFETCH_NOT_BENEFICIAL,
1816   false,                                        /* Prefer constant pool.  */
1817   arm_default_branch_cost,
1818   false,                                        /* Prefer LDRD/STRD.  */
1819   {true, true},                                 /* Prefer non short circuit.  */
1820   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1821   false,                                        /* Prefer Neon for 64-bits bitops.  */
1822   false, false,                                 /* Prefer 32-bit encodings.  */
1823   true,                                         /* Prefer Neon for stringops.  */
1824   8                                             /* Maximum insns to inline memset.  */
1825 };
1826
1827 const struct tune_params arm_cortex_a15_tune =
1828 {
1829   arm_9e_rtx_costs,
1830   &cortexa15_extra_costs,
1831   NULL,                                         /* Sched adj cost.  */
1832   1,                                            /* Constant limit.  */
1833   2,                                            /* Max cond insns.  */
1834   ARM_PREFETCH_NOT_BENEFICIAL,
1835   false,                                        /* Prefer constant pool.  */
1836   arm_default_branch_cost,
1837   true,                                         /* Prefer LDRD/STRD.  */
1838   {true, true},                                 /* Prefer non short circuit.  */
1839   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1840   false,                                        /* Prefer Neon for 64-bits bitops.  */
1841   true, true,                                   /* Prefer 32-bit encodings.  */
1842   true,                                         /* Prefer Neon for stringops.  */
1843   8                                             /* Maximum insns to inline memset.  */
1844 };
1845
1846 const struct tune_params arm_cortex_a53_tune =
1847 {
1848   arm_9e_rtx_costs,
1849   &cortexa53_extra_costs,
1850   NULL,                                         /* Scheduler cost adjustment.  */
1851   1,                                            /* Constant limit.  */
1852   5,                                            /* Max cond insns.  */
1853   ARM_PREFETCH_NOT_BENEFICIAL,
1854   false,                                        /* Prefer constant pool.  */
1855   arm_default_branch_cost,
1856   false,                                        /* Prefer LDRD/STRD.  */
1857   {true, true},                                 /* Prefer non short circuit.  */
1858   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1859   false,                                        /* Prefer Neon for 64-bits bitops.  */
1860   false, false,                                 /* Prefer 32-bit encodings.  */
1861   false,                                        /* Prefer Neon for stringops.  */
1862   8                                             /* Maximum insns to inline memset.  */
1863 };
1864
1865 const struct tune_params arm_cortex_a57_tune =
1866 {
1867   arm_9e_rtx_costs,
1868   &cortexa57_extra_costs,
1869   NULL,                                         /* Scheduler cost adjustment.  */
1870   1,                                           /* Constant limit.  */
1871   2,                                           /* Max cond insns.  */
1872   ARM_PREFETCH_NOT_BENEFICIAL,
1873   false,                                       /* Prefer constant pool.  */
1874   arm_default_branch_cost,
1875   true,                                       /* Prefer LDRD/STRD.  */
1876   {true, true},                                /* Prefer non short circuit.  */
1877   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1878   false,                                       /* Prefer Neon for 64-bits bitops.  */
1879   true, true,                                  /* Prefer 32-bit encodings.  */
1880   false,                                        /* Prefer Neon for stringops.  */
1881   8                                             /* Maximum insns to inline memset.  */
1882 };
1883
1884 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1885    less appealing.  Set max_insns_skipped to a low value.  */
1886
1887 const struct tune_params arm_cortex_a5_tune =
1888 {
1889   arm_9e_rtx_costs,
1890   &cortexa5_extra_costs,
1891   NULL,                                         /* Sched adj cost.  */
1892   1,                                            /* Constant limit.  */
1893   1,                                            /* Max cond insns.  */
1894   ARM_PREFETCH_NOT_BENEFICIAL,
1895   false,                                        /* Prefer constant pool.  */
1896   arm_cortex_a5_branch_cost,
1897   false,                                        /* Prefer LDRD/STRD.  */
1898   {false, false},                               /* Prefer non short circuit.  */
1899   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1900   false,                                        /* Prefer Neon for 64-bits bitops.  */
1901   false, false,                                 /* Prefer 32-bit encodings.  */
1902   true,                                         /* Prefer Neon for stringops.  */
1903   8                                             /* Maximum insns to inline memset.  */
1904 };
1905
1906 const struct tune_params arm_cortex_a9_tune =
1907 {
1908   arm_9e_rtx_costs,
1909   &cortexa9_extra_costs,
1910   cortex_a9_sched_adjust_cost,
1911   1,                                            /* Constant limit.  */
1912   5,                                            /* Max cond insns.  */
1913   ARM_PREFETCH_BENEFICIAL(4,32,32),
1914   false,                                        /* Prefer constant pool.  */
1915   arm_default_branch_cost,
1916   false,                                        /* Prefer LDRD/STRD.  */
1917   {true, true},                                 /* Prefer non short circuit.  */
1918   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1919   false,                                        /* Prefer Neon for 64-bits bitops.  */
1920   false, false,                                 /* Prefer 32-bit encodings.  */
1921   false,                                        /* Prefer Neon for stringops.  */
1922   8                                             /* Maximum insns to inline memset.  */
1923 };
1924
1925 const struct tune_params arm_cortex_a12_tune =
1926 {
1927   arm_9e_rtx_costs,
1928   &cortexa12_extra_costs,
1929   NULL,
1930   1,                                            /* Constant limit.  */
1931   5,                                            /* Max cond insns.  */
1932   ARM_PREFETCH_BENEFICIAL(4,32,32),
1933   false,                                        /* Prefer constant pool.  */
1934   arm_default_branch_cost,
1935   true,                                         /* Prefer LDRD/STRD.  */
1936   {true, true},                                 /* Prefer non short circuit.  */
1937   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1938   false,                                        /* Prefer Neon for 64-bits bitops.  */
1939   false, false,                                 /* Prefer 32-bit encodings.  */
1940   true,                                         /* Prefer Neon for stringops.  */
1941   8                                             /* Maximum insns to inline memset.  */
1942 };
1943
1944 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
1945    cycle to execute each.  An LDR from the constant pool also takes two cycles
1946    to execute, but mildly increases pipelining opportunity (consecutive
1947    loads/stores can be pipelined together, saving one cycle), and may also
1948    improve icache utilisation.  Hence we prefer the constant pool for such
1949    processors.  */
1950
1951 const struct tune_params arm_v7m_tune =
1952 {
1953   arm_9e_rtx_costs,
1954   &v7m_extra_costs,
1955   NULL,                                         /* Sched adj cost.  */
1956   1,                                            /* Constant limit.  */
1957   2,                                            /* Max cond insns.  */
1958   ARM_PREFETCH_NOT_BENEFICIAL,
1959   true,                                         /* Prefer constant pool.  */
1960   arm_cortex_m_branch_cost,
1961   false,                                        /* Prefer LDRD/STRD.  */
1962   {false, false},                               /* Prefer non short circuit.  */
1963   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1964   false,                                        /* Prefer Neon for 64-bits bitops.  */
1965   false, false,                                 /* Prefer 32-bit encodings.  */
1966   false,                                        /* Prefer Neon for stringops.  */
1967   8                                             /* Maximum insns to inline memset.  */
1968 };
1969
1970 /* Cortex-M7 tuning.  */
1971
1972 const struct tune_params arm_cortex_m7_tune =
1973 {
1974   arm_9e_rtx_costs,
1975   &v7m_extra_costs,
1976   NULL,                                         /* Sched adj cost.  */
1977   0,                                            /* Constant limit.  */
1978   0,                                            /* Max cond insns.  */
1979   ARM_PREFETCH_NOT_BENEFICIAL,
1980   true,                                         /* Prefer constant pool.  */
1981   arm_cortex_m_branch_cost,
1982   false,                                        /* Prefer LDRD/STRD.  */
1983   {true, true},                                 /* Prefer non short circuit.  */
1984   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1985   false,                                        /* Prefer Neon for 64-bits bitops.  */
1986   false, false,                                 /* Prefer 32-bit encodings.  */
1987   false,                                        /* Prefer Neon for stringops.  */
1988   8                                             /* Maximum insns to inline memset.  */
1989 };
1990
1991 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1992    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
1993 const struct tune_params arm_v6m_tune =
1994 {
1995   arm_9e_rtx_costs,
1996   NULL,
1997   NULL,                                         /* Sched adj cost.  */
1998   1,                                            /* Constant limit.  */
1999   5,                                            /* Max cond insns.  */
2000   ARM_PREFETCH_NOT_BENEFICIAL,
2001   false,                                        /* Prefer constant pool.  */
2002   arm_default_branch_cost,
2003   false,                                        /* Prefer LDRD/STRD.  */
2004   {false, false},                               /* Prefer non short circuit.  */
2005   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2006   false,                                        /* Prefer Neon for 64-bits bitops.  */
2007   false, false,                                 /* Prefer 32-bit encodings.  */
2008   false,                                        /* Prefer Neon for stringops.  */
2009   8                                             /* Maximum insns to inline memset.  */
2010 };
2011
2012 const struct tune_params arm_fa726te_tune =
2013 {
2014   arm_9e_rtx_costs,
2015   NULL,
2016   fa726te_sched_adjust_cost,
2017   1,                                            /* Constant limit.  */
2018   5,                                            /* Max cond insns.  */
2019   ARM_PREFETCH_NOT_BENEFICIAL,
2020   true,                                         /* Prefer constant pool.  */
2021   arm_default_branch_cost,
2022   false,                                        /* Prefer LDRD/STRD.  */
2023   {true, true},                                 /* Prefer non short circuit.  */
2024   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2025   false,                                        /* Prefer Neon for 64-bits bitops.  */
2026   false, false,                                 /* Prefer 32-bit encodings.  */
2027   false,                                        /* Prefer Neon for stringops.  */
2028   8                                             /* Maximum insns to inline memset.  */
2029 };
2030
2031
2032 /* Not all of these give usefully different compilation alternatives,
2033    but there is no simple way of generalizing them.  */
2034 static const struct processors all_cores[] =
2035 {
2036   /* ARM Cores */
2037 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2038   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2039     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2040 #include "arm-cores.def"
2041 #undef ARM_CORE
2042   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2043 };
2044
2045 static const struct processors all_architectures[] =
2046 {
2047   /* ARM Architectures */
2048   /* We don't specify tuning costs here as it will be figured out
2049      from the core.  */
2050
2051 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2052   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2053 #include "arm-arches.def"
2054 #undef ARM_ARCH
2055   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2056 };
2057
2058
2059 /* These are populated as commandline arguments are processed, or NULL
2060    if not specified.  */
2061 static const struct processors *arm_selected_arch;
2062 static const struct processors *arm_selected_cpu;
2063 static const struct processors *arm_selected_tune;
2064
2065 /* The name of the preprocessor macro to define for this architecture.  */
2066
2067 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2068
2069 /* Available values for -mfpu=.  */
2070
2071 static const struct arm_fpu_desc all_fpus[] =
2072 {
2073 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2074   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2075 #include "arm-fpus.def"
2076 #undef ARM_FPU
2077 };
2078
2079
2080 /* Supported TLS relocations.  */
2081
2082 enum tls_reloc {
2083   TLS_GD32,
2084   TLS_LDM32,
2085   TLS_LDO32,
2086   TLS_IE32,
2087   TLS_LE32,
2088   TLS_DESCSEQ   /* GNU scheme */
2089 };
2090
2091 /* The maximum number of insns to be used when loading a constant.  */
2092 inline static int
2093 arm_constant_limit (bool size_p)
2094 {
2095   return size_p ? 1 : current_tune->constant_limit;
2096 }
2097
2098 /* Emit an insn that's a simple single-set.  Both the operands must be known
2099    to be valid.  */
2100 inline static rtx_insn *
2101 emit_set_insn (rtx x, rtx y)
2102 {
2103   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2104 }
2105
2106 /* Return the number of bits set in VALUE.  */
2107 static unsigned
2108 bit_count (unsigned long value)
2109 {
2110   unsigned long count = 0;
2111
2112   while (value)
2113     {
2114       count++;
2115       value &= value - 1;  /* Clear the least-significant set bit.  */
2116     }
2117
2118   return count;
2119 }
2120
2121 typedef struct
2122 {
2123   machine_mode mode;
2124   const char *name;
2125 } arm_fixed_mode_set;
2126
2127 /* A small helper for setting fixed-point library libfuncs.  */
2128
2129 static void
2130 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2131                              const char *funcname, const char *modename,
2132                              int num_suffix)
2133 {
2134   char buffer[50];
2135
2136   if (num_suffix == 0)
2137     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2138   else
2139     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2140
2141   set_optab_libfunc (optable, mode, buffer);
2142 }
2143
2144 static void
2145 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2146                             machine_mode from, const char *funcname,
2147                             const char *toname, const char *fromname)
2148 {
2149   char buffer[50];
2150   const char *maybe_suffix_2 = "";
2151
2152   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2153   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2154       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2155       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2156     maybe_suffix_2 = "2";
2157
2158   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2159            maybe_suffix_2);
2160
2161   set_conv_libfunc (optable, to, from, buffer);
2162 }
2163
2164 /* Set up library functions unique to ARM.  */
2165
2166 static void
2167 arm_init_libfuncs (void)
2168 {
2169   /* For Linux, we have access to kernel support for atomic operations.  */
2170   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2171     init_sync_libfuncs (2 * UNITS_PER_WORD);
2172
2173   /* There are no special library functions unless we are using the
2174      ARM BPABI.  */
2175   if (!TARGET_BPABI)
2176     return;
2177
2178   /* The functions below are described in Section 4 of the "Run-Time
2179      ABI for the ARM architecture", Version 1.0.  */
2180
2181   /* Double-precision floating-point arithmetic.  Table 2.  */
2182   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2183   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2184   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2185   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2186   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2187
2188   /* Double-precision comparisons.  Table 3.  */
2189   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2190   set_optab_libfunc (ne_optab, DFmode, NULL);
2191   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2192   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2193   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2194   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2195   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2196
2197   /* Single-precision floating-point arithmetic.  Table 4.  */
2198   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2199   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2200   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2201   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2202   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2203
2204   /* Single-precision comparisons.  Table 5.  */
2205   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2206   set_optab_libfunc (ne_optab, SFmode, NULL);
2207   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2208   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2209   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2210   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2211   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2212
2213   /* Floating-point to integer conversions.  Table 6.  */
2214   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2215   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2216   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2217   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2218   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2219   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2220   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2221   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2222
2223   /* Conversions between floating types.  Table 7.  */
2224   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2225   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2226
2227   /* Integer to floating-point conversions.  Table 8.  */
2228   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2229   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2230   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2231   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2232   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2233   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2234   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2235   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2236
2237   /* Long long.  Table 9.  */
2238   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2239   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2240   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2241   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2242   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2243   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2244   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2245   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2246
2247   /* Integer (32/32->32) division.  \S 4.3.1.  */
2248   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2249   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2250
2251   /* The divmod functions are designed so that they can be used for
2252      plain division, even though they return both the quotient and the
2253      remainder.  The quotient is returned in the usual location (i.e.,
2254      r0 for SImode, {r0, r1} for DImode), just as would be expected
2255      for an ordinary division routine.  Because the AAPCS calling
2256      conventions specify that all of { r0, r1, r2, r3 } are
2257      callee-saved registers, there is no need to tell the compiler
2258      explicitly that those registers are clobbered by these
2259      routines.  */
2260   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2261   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2262
2263   /* For SImode division the ABI provides div-without-mod routines,
2264      which are faster.  */
2265   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2266   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2267
2268   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2269      divmod libcalls instead.  */
2270   set_optab_libfunc (smod_optab, DImode, NULL);
2271   set_optab_libfunc (umod_optab, DImode, NULL);
2272   set_optab_libfunc (smod_optab, SImode, NULL);
2273   set_optab_libfunc (umod_optab, SImode, NULL);
2274
2275   /* Half-precision float operations.  The compiler handles all operations
2276      with NULL libfuncs by converting the SFmode.  */
2277   switch (arm_fp16_format)
2278     {
2279     case ARM_FP16_FORMAT_IEEE:
2280     case ARM_FP16_FORMAT_ALTERNATIVE:
2281
2282       /* Conversions.  */
2283       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2284                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2285                          ? "__gnu_f2h_ieee"
2286                          : "__gnu_f2h_alternative"));
2287       set_conv_libfunc (sext_optab, SFmode, HFmode,
2288                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2289                          ? "__gnu_h2f_ieee"
2290                          : "__gnu_h2f_alternative"));
2291
2292       /* Arithmetic.  */
2293       set_optab_libfunc (add_optab, HFmode, NULL);
2294       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2295       set_optab_libfunc (smul_optab, HFmode, NULL);
2296       set_optab_libfunc (neg_optab, HFmode, NULL);
2297       set_optab_libfunc (sub_optab, HFmode, NULL);
2298
2299       /* Comparisons.  */
2300       set_optab_libfunc (eq_optab, HFmode, NULL);
2301       set_optab_libfunc (ne_optab, HFmode, NULL);
2302       set_optab_libfunc (lt_optab, HFmode, NULL);
2303       set_optab_libfunc (le_optab, HFmode, NULL);
2304       set_optab_libfunc (ge_optab, HFmode, NULL);
2305       set_optab_libfunc (gt_optab, HFmode, NULL);
2306       set_optab_libfunc (unord_optab, HFmode, NULL);
2307       break;
2308
2309     default:
2310       break;
2311     }
2312
2313   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2314   {
2315     const arm_fixed_mode_set fixed_arith_modes[] =
2316       {
2317         { QQmode, "qq" },
2318         { UQQmode, "uqq" },
2319         { HQmode, "hq" },
2320         { UHQmode, "uhq" },
2321         { SQmode, "sq" },
2322         { USQmode, "usq" },
2323         { DQmode, "dq" },
2324         { UDQmode, "udq" },
2325         { TQmode, "tq" },
2326         { UTQmode, "utq" },
2327         { HAmode, "ha" },
2328         { UHAmode, "uha" },
2329         { SAmode, "sa" },
2330         { USAmode, "usa" },
2331         { DAmode, "da" },
2332         { UDAmode, "uda" },
2333         { TAmode, "ta" },
2334         { UTAmode, "uta" }
2335       };
2336     const arm_fixed_mode_set fixed_conv_modes[] =
2337       {
2338         { QQmode, "qq" },
2339         { UQQmode, "uqq" },
2340         { HQmode, "hq" },
2341         { UHQmode, "uhq" },
2342         { SQmode, "sq" },
2343         { USQmode, "usq" },
2344         { DQmode, "dq" },
2345         { UDQmode, "udq" },
2346         { TQmode, "tq" },
2347         { UTQmode, "utq" },
2348         { HAmode, "ha" },
2349         { UHAmode, "uha" },
2350         { SAmode, "sa" },
2351         { USAmode, "usa" },
2352         { DAmode, "da" },
2353         { UDAmode, "uda" },
2354         { TAmode, "ta" },
2355         { UTAmode, "uta" },
2356         { QImode, "qi" },
2357         { HImode, "hi" },
2358         { SImode, "si" },
2359         { DImode, "di" },
2360         { TImode, "ti" },
2361         { SFmode, "sf" },
2362         { DFmode, "df" }
2363       };
2364     unsigned int i, j;
2365
2366     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2367       {
2368         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2369                                      "add", fixed_arith_modes[i].name, 3);
2370         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2371                                      "ssadd", fixed_arith_modes[i].name, 3);
2372         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2373                                      "usadd", fixed_arith_modes[i].name, 3);
2374         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2375                                      "sub", fixed_arith_modes[i].name, 3);
2376         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2377                                      "sssub", fixed_arith_modes[i].name, 3);
2378         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2379                                      "ussub", fixed_arith_modes[i].name, 3);
2380         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2381                                      "mul", fixed_arith_modes[i].name, 3);
2382         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2383                                      "ssmul", fixed_arith_modes[i].name, 3);
2384         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2385                                      "usmul", fixed_arith_modes[i].name, 3);
2386         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2387                                      "div", fixed_arith_modes[i].name, 3);
2388         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2389                                      "udiv", fixed_arith_modes[i].name, 3);
2390         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2391                                      "ssdiv", fixed_arith_modes[i].name, 3);
2392         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2393                                      "usdiv", fixed_arith_modes[i].name, 3);
2394         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2395                                      "neg", fixed_arith_modes[i].name, 2);
2396         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2397                                      "ssneg", fixed_arith_modes[i].name, 2);
2398         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2399                                      "usneg", fixed_arith_modes[i].name, 2);
2400         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2401                                      "ashl", fixed_arith_modes[i].name, 3);
2402         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2403                                      "ashr", fixed_arith_modes[i].name, 3);
2404         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2405                                      "lshr", fixed_arith_modes[i].name, 3);
2406         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2407                                      "ssashl", fixed_arith_modes[i].name, 3);
2408         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2409                                      "usashl", fixed_arith_modes[i].name, 3);
2410         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2411                                      "cmp", fixed_arith_modes[i].name, 2);
2412       }
2413
2414     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2415       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2416         {
2417           if (i == j
2418               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2419                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2420             continue;
2421
2422           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2423                                       fixed_conv_modes[j].mode, "fract",
2424                                       fixed_conv_modes[i].name,
2425                                       fixed_conv_modes[j].name);
2426           arm_set_fixed_conv_libfunc (satfract_optab,
2427                                       fixed_conv_modes[i].mode,
2428                                       fixed_conv_modes[j].mode, "satfract",
2429                                       fixed_conv_modes[i].name,
2430                                       fixed_conv_modes[j].name);
2431           arm_set_fixed_conv_libfunc (fractuns_optab,
2432                                       fixed_conv_modes[i].mode,
2433                                       fixed_conv_modes[j].mode, "fractuns",
2434                                       fixed_conv_modes[i].name,
2435                                       fixed_conv_modes[j].name);
2436           arm_set_fixed_conv_libfunc (satfractuns_optab,
2437                                       fixed_conv_modes[i].mode,
2438                                       fixed_conv_modes[j].mode, "satfractuns",
2439                                       fixed_conv_modes[i].name,
2440                                       fixed_conv_modes[j].name);
2441         }
2442   }
2443
2444   if (TARGET_AAPCS_BASED)
2445     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2446 }
2447
2448 /* On AAPCS systems, this is the "struct __va_list".  */
2449 static GTY(()) tree va_list_type;
2450
2451 /* Return the type to use as __builtin_va_list.  */
2452 static tree
2453 arm_build_builtin_va_list (void)
2454 {
2455   tree va_list_name;
2456   tree ap_field;
2457
2458   if (!TARGET_AAPCS_BASED)
2459     return std_build_builtin_va_list ();
2460
2461   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2462      defined as:
2463
2464        struct __va_list
2465        {
2466          void *__ap;
2467        };
2468
2469      The C Library ABI further reinforces this definition in \S
2470      4.1.
2471
2472      We must follow this definition exactly.  The structure tag
2473      name is visible in C++ mangled names, and thus forms a part
2474      of the ABI.  The field name may be used by people who
2475      #include <stdarg.h>.  */
2476   /* Create the type.  */
2477   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2478   /* Give it the required name.  */
2479   va_list_name = build_decl (BUILTINS_LOCATION,
2480                              TYPE_DECL,
2481                              get_identifier ("__va_list"),
2482                              va_list_type);
2483   DECL_ARTIFICIAL (va_list_name) = 1;
2484   TYPE_NAME (va_list_type) = va_list_name;
2485   TYPE_STUB_DECL (va_list_type) = va_list_name;
2486   /* Create the __ap field.  */
2487   ap_field = build_decl (BUILTINS_LOCATION,
2488                          FIELD_DECL,
2489                          get_identifier ("__ap"),
2490                          ptr_type_node);
2491   DECL_ARTIFICIAL (ap_field) = 1;
2492   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2493   TYPE_FIELDS (va_list_type) = ap_field;
2494   /* Compute its layout.  */
2495   layout_type (va_list_type);
2496
2497   return va_list_type;
2498 }
2499
2500 /* Return an expression of type "void *" pointing to the next
2501    available argument in a variable-argument list.  VALIST is the
2502    user-level va_list object, of type __builtin_va_list.  */
2503 static tree
2504 arm_extract_valist_ptr (tree valist)
2505 {
2506   if (TREE_TYPE (valist) == error_mark_node)
2507     return error_mark_node;
2508
2509   /* On an AAPCS target, the pointer is stored within "struct
2510      va_list".  */
2511   if (TARGET_AAPCS_BASED)
2512     {
2513       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2514       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2515                        valist, ap_field, NULL_TREE);
2516     }
2517
2518   return valist;
2519 }
2520
2521 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2522 static void
2523 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2524 {
2525   valist = arm_extract_valist_ptr (valist);
2526   std_expand_builtin_va_start (valist, nextarg);
2527 }
2528
2529 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2530 static tree
2531 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2532                           gimple_seq *post_p)
2533 {
2534   valist = arm_extract_valist_ptr (valist);
2535   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2536 }
2537
2538 /* Fix up any incompatible options that the user has specified.  */
2539 static void
2540 arm_option_override (void)
2541 {
2542   if (global_options_set.x_arm_arch_option)
2543     arm_selected_arch = &all_architectures[arm_arch_option];
2544
2545   if (global_options_set.x_arm_cpu_option)
2546     {
2547       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2548       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2549     }
2550
2551   if (global_options_set.x_arm_tune_option)
2552     arm_selected_tune = &all_cores[(int) arm_tune_option];
2553
2554 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2555   SUBTARGET_OVERRIDE_OPTIONS;
2556 #endif
2557
2558   if (arm_selected_arch)
2559     {
2560       if (arm_selected_cpu)
2561         {
2562           /* Check for conflict between mcpu and march.  */
2563           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2564             {
2565               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2566                        arm_selected_cpu->name, arm_selected_arch->name);
2567               /* -march wins for code generation.
2568                  -mcpu wins for default tuning.  */
2569               if (!arm_selected_tune)
2570                 arm_selected_tune = arm_selected_cpu;
2571
2572               arm_selected_cpu = arm_selected_arch;
2573             }
2574           else
2575             /* -mcpu wins.  */
2576             arm_selected_arch = NULL;
2577         }
2578       else
2579         /* Pick a CPU based on the architecture.  */
2580         arm_selected_cpu = arm_selected_arch;
2581     }
2582
2583   /* If the user did not specify a processor, choose one for them.  */
2584   if (!arm_selected_cpu)
2585     {
2586       const struct processors * sel;
2587       unsigned int        sought;
2588
2589       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2590       if (!arm_selected_cpu->name)
2591         {
2592 #ifdef SUBTARGET_CPU_DEFAULT
2593           /* Use the subtarget default CPU if none was specified by
2594              configure.  */
2595           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2596 #endif
2597           /* Default to ARM6.  */
2598           if (!arm_selected_cpu->name)
2599             arm_selected_cpu = &all_cores[arm6];
2600         }
2601
2602       sel = arm_selected_cpu;
2603       insn_flags = sel->flags;
2604
2605       /* Now check to see if the user has specified some command line
2606          switch that require certain abilities from the cpu.  */
2607       sought = 0;
2608
2609       if (TARGET_INTERWORK || TARGET_THUMB)
2610         {
2611           sought |= (FL_THUMB | FL_MODE32);
2612
2613           /* There are no ARM processors that support both APCS-26 and
2614              interworking.  Therefore we force FL_MODE26 to be removed
2615              from insn_flags here (if it was set), so that the search
2616              below will always be able to find a compatible processor.  */
2617           insn_flags &= ~FL_MODE26;
2618         }
2619
2620       if (sought != 0 && ((sought & insn_flags) != sought))
2621         {
2622           /* Try to locate a CPU type that supports all of the abilities
2623              of the default CPU, plus the extra abilities requested by
2624              the user.  */
2625           for (sel = all_cores; sel->name != NULL; sel++)
2626             if ((sel->flags & sought) == (sought | insn_flags))
2627               break;
2628
2629           if (sel->name == NULL)
2630             {
2631               unsigned current_bit_count = 0;
2632               const struct processors * best_fit = NULL;
2633
2634               /* Ideally we would like to issue an error message here
2635                  saying that it was not possible to find a CPU compatible
2636                  with the default CPU, but which also supports the command
2637                  line options specified by the programmer, and so they
2638                  ought to use the -mcpu=<name> command line option to
2639                  override the default CPU type.
2640
2641                  If we cannot find a cpu that has both the
2642                  characteristics of the default cpu and the given
2643                  command line options we scan the array again looking
2644                  for a best match.  */
2645               for (sel = all_cores; sel->name != NULL; sel++)
2646                 if ((sel->flags & sought) == sought)
2647                   {
2648                     unsigned count;
2649
2650                     count = bit_count (sel->flags & insn_flags);
2651
2652                     if (count >= current_bit_count)
2653                       {
2654                         best_fit = sel;
2655                         current_bit_count = count;
2656                       }
2657                   }
2658
2659               gcc_assert (best_fit);
2660               sel = best_fit;
2661             }
2662
2663           arm_selected_cpu = sel;
2664         }
2665     }
2666
2667   gcc_assert (arm_selected_cpu);
2668   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2669   if (!arm_selected_tune)
2670     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2671
2672   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2673   insn_flags = arm_selected_cpu->flags;
2674   arm_base_arch = arm_selected_cpu->base_arch;
2675
2676   arm_tune = arm_selected_tune->core;
2677   tune_flags = arm_selected_tune->flags;
2678   current_tune = arm_selected_tune->tune;
2679
2680   /* Make sure that the processor choice does not conflict with any of the
2681      other command line choices.  */
2682   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2683     error ("target CPU does not support ARM mode");
2684
2685   /* BPABI targets use linker tricks to allow interworking on cores
2686      without thumb support.  */
2687   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2688     {
2689       warning (0, "target CPU does not support interworking" );
2690       target_flags &= ~MASK_INTERWORK;
2691     }
2692
2693   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2694     {
2695       warning (0, "target CPU does not support THUMB instructions");
2696       target_flags &= ~MASK_THUMB;
2697     }
2698
2699   if (TARGET_APCS_FRAME && TARGET_THUMB)
2700     {
2701       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2702       target_flags &= ~MASK_APCS_FRAME;
2703     }
2704
2705   /* Callee super interworking implies thumb interworking.  Adding
2706      this to the flags here simplifies the logic elsewhere.  */
2707   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2708     target_flags |= MASK_INTERWORK;
2709
2710   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2711      from here where no function is being compiled currently.  */
2712   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2713     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2714
2715   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2716     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2717
2718   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2719     {
2720       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2721       target_flags |= MASK_APCS_FRAME;
2722     }
2723
2724   if (TARGET_POKE_FUNCTION_NAME)
2725     target_flags |= MASK_APCS_FRAME;
2726
2727   if (TARGET_APCS_REENT && flag_pic)
2728     error ("-fpic and -mapcs-reent are incompatible");
2729
2730   if (TARGET_APCS_REENT)
2731     warning (0, "APCS reentrant code not supported.  Ignored");
2732
2733   /* If this target is normally configured to use APCS frames, warn if they
2734      are turned off and debugging is turned on.  */
2735   if (TARGET_ARM
2736       && write_symbols != NO_DEBUG
2737       && !TARGET_APCS_FRAME
2738       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2739     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2740
2741   if (TARGET_APCS_FLOAT)
2742     warning (0, "passing floating point arguments in fp regs not yet supported");
2743
2744   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2745   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2746   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2747   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2748   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2749   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2750   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2751   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2752   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2753   arm_arch6m = arm_arch6 && !arm_arch_notm;
2754   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2755   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2756   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2757   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2758   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2759
2760   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2761   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2762   thumb_code = TARGET_ARM == 0;
2763   thumb1_code = TARGET_THUMB1 != 0;
2764   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2765   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2766   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2767   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2768   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2769   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2770   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2771   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2772   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2773   if (arm_restrict_it == 2)
2774     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2775
2776   if (!TARGET_THUMB2)
2777     arm_restrict_it = 0;
2778
2779   /* If we are not using the default (ARM mode) section anchor offset
2780      ranges, then set the correct ranges now.  */
2781   if (TARGET_THUMB1)
2782     {
2783       /* Thumb-1 LDR instructions cannot have negative offsets.
2784          Permissible positive offset ranges are 5-bit (for byte loads),
2785          6-bit (for halfword loads), or 7-bit (for word loads).
2786          Empirical results suggest a 7-bit anchor range gives the best
2787          overall code size.  */
2788       targetm.min_anchor_offset = 0;
2789       targetm.max_anchor_offset = 127;
2790     }
2791   else if (TARGET_THUMB2)
2792     {
2793       /* The minimum is set such that the total size of the block
2794          for a particular anchor is 248 + 1 + 4095 bytes, which is
2795          divisible by eight, ensuring natural spacing of anchors.  */
2796       targetm.min_anchor_offset = -248;
2797       targetm.max_anchor_offset = 4095;
2798     }
2799
2800   /* V5 code we generate is completely interworking capable, so we turn off
2801      TARGET_INTERWORK here to avoid many tests later on.  */
2802
2803   /* XXX However, we must pass the right pre-processor defines to CPP
2804      or GLD can get confused.  This is a hack.  */
2805   if (TARGET_INTERWORK)
2806     arm_cpp_interwork = 1;
2807
2808   if (arm_arch5)
2809     target_flags &= ~MASK_INTERWORK;
2810
2811   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2812     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2813
2814   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2815     error ("iwmmxt abi requires an iwmmxt capable cpu");
2816
2817   if (!global_options_set.x_arm_fpu_index)
2818     {
2819       const char *target_fpu_name;
2820       bool ok;
2821
2822 #ifdef FPUTYPE_DEFAULT
2823       target_fpu_name = FPUTYPE_DEFAULT;
2824 #else
2825       target_fpu_name = "vfp";
2826 #endif
2827
2828       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2829                                   CL_TARGET);
2830       gcc_assert (ok);
2831     }
2832
2833   arm_fpu_desc = &all_fpus[arm_fpu_index];
2834
2835   if (TARGET_NEON && !arm_arch7)
2836     error ("target CPU does not support NEON");
2837
2838   switch (arm_fpu_desc->model)
2839     {
2840     case ARM_FP_MODEL_VFP:
2841       arm_fpu_attr = FPU_VFP;
2842       break;
2843
2844     default:
2845       gcc_unreachable();
2846     }
2847
2848   if (TARGET_AAPCS_BASED)
2849     {
2850       if (TARGET_CALLER_INTERWORKING)
2851         error ("AAPCS does not support -mcaller-super-interworking");
2852       else
2853         if (TARGET_CALLEE_INTERWORKING)
2854           error ("AAPCS does not support -mcallee-super-interworking");
2855     }
2856
2857   /* iWMMXt and NEON are incompatible.  */
2858   if (TARGET_IWMMXT && TARGET_NEON)
2859     error ("iWMMXt and NEON are incompatible");
2860
2861   /* iWMMXt unsupported under Thumb mode.  */
2862   if (TARGET_THUMB && TARGET_IWMMXT)
2863     error ("iWMMXt unsupported under Thumb mode");
2864
2865   /* __fp16 support currently assumes the core has ldrh.  */
2866   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2867     sorry ("__fp16 and no ldrh");
2868
2869   /* If soft-float is specified then don't use FPU.  */
2870   if (TARGET_SOFT_FLOAT)
2871     arm_fpu_attr = FPU_NONE;
2872
2873   if (TARGET_AAPCS_BASED)
2874     {
2875       if (arm_abi == ARM_ABI_IWMMXT)
2876         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2877       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2878                && TARGET_HARD_FLOAT
2879                && TARGET_VFP)
2880         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2881       else
2882         arm_pcs_default = ARM_PCS_AAPCS;
2883     }
2884   else
2885     {
2886       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2887         sorry ("-mfloat-abi=hard and VFP");
2888
2889       if (arm_abi == ARM_ABI_APCS)
2890         arm_pcs_default = ARM_PCS_APCS;
2891       else
2892         arm_pcs_default = ARM_PCS_ATPCS;
2893     }
2894
2895   /* For arm2/3 there is no need to do any scheduling if we are doing
2896      software floating-point.  */
2897   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2898     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2899
2900   /* Use the cp15 method if it is available.  */
2901   if (target_thread_pointer == TP_AUTO)
2902     {
2903       if (arm_arch6k && !TARGET_THUMB1)
2904         target_thread_pointer = TP_CP15;
2905       else
2906         target_thread_pointer = TP_SOFT;
2907     }
2908
2909   if (TARGET_HARD_TP && TARGET_THUMB1)
2910     error ("can not use -mtp=cp15 with 16-bit Thumb");
2911
2912   /* Override the default structure alignment for AAPCS ABI.  */
2913   if (!global_options_set.x_arm_structure_size_boundary)
2914     {
2915       if (TARGET_AAPCS_BASED)
2916         arm_structure_size_boundary = 8;
2917     }
2918   else
2919     {
2920       if (arm_structure_size_boundary != 8
2921           && arm_structure_size_boundary != 32
2922           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2923         {
2924           if (ARM_DOUBLEWORD_ALIGN)
2925             warning (0,
2926                      "structure size boundary can only be set to 8, 32 or 64");
2927           else
2928             warning (0, "structure size boundary can only be set to 8 or 32");
2929           arm_structure_size_boundary
2930             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2931         }
2932     }
2933
2934   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2935     {
2936       error ("RTP PIC is incompatible with Thumb");
2937       flag_pic = 0;
2938     }
2939
2940   /* If stack checking is disabled, we can use r10 as the PIC register,
2941      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
2942   if (flag_pic && TARGET_SINGLE_PIC_BASE)
2943     {
2944       if (TARGET_VXWORKS_RTP)
2945         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2946       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2947     }
2948
2949   if (flag_pic && TARGET_VXWORKS_RTP)
2950     arm_pic_register = 9;
2951
2952   if (arm_pic_register_string != NULL)
2953     {
2954       int pic_register = decode_reg_name (arm_pic_register_string);
2955
2956       if (!flag_pic)
2957         warning (0, "-mpic-register= is useless without -fpic");
2958
2959       /* Prevent the user from choosing an obviously stupid PIC register.  */
2960       else if (pic_register < 0 || call_used_regs[pic_register]
2961                || pic_register == HARD_FRAME_POINTER_REGNUM
2962                || pic_register == STACK_POINTER_REGNUM
2963                || pic_register >= PC_REGNUM
2964                || (TARGET_VXWORKS_RTP
2965                    && (unsigned int) pic_register != arm_pic_register))
2966         error ("unable to use '%s' for PIC register", arm_pic_register_string);
2967       else
2968         arm_pic_register = pic_register;
2969     }
2970
2971   if (TARGET_VXWORKS_RTP
2972       && !global_options_set.x_arm_pic_data_is_text_relative)
2973     arm_pic_data_is_text_relative = 0;
2974
2975   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
2976   if (fix_cm3_ldrd == 2)
2977     {
2978       if (arm_selected_cpu->core == cortexm3)
2979         fix_cm3_ldrd = 1;
2980       else
2981         fix_cm3_ldrd = 0;
2982     }
2983
2984   /* Enable -munaligned-access by default for
2985      - all ARMv6 architecture-based processors
2986      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2987      - ARMv8 architecture-base processors.
2988
2989      Disable -munaligned-access by default for
2990      - all pre-ARMv6 architecture-based processors
2991      - ARMv6-M architecture-based processors.  */
2992
2993   if (unaligned_access == 2)
2994     {
2995       if (arm_arch6 && (arm_arch_notm || arm_arch7))
2996         unaligned_access = 1;
2997       else
2998         unaligned_access = 0;
2999     }
3000   else if (unaligned_access == 1
3001            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3002     {
3003       warning (0, "target CPU does not support unaligned accesses");
3004       unaligned_access = 0;
3005     }
3006
3007   if (TARGET_THUMB1 && flag_schedule_insns)
3008     {
3009       /* Don't warn since it's on by default in -O2.  */
3010       flag_schedule_insns = 0;
3011     }
3012
3013   if (optimize_size)
3014     {
3015       /* If optimizing for size, bump the number of instructions that we
3016          are prepared to conditionally execute (even on a StrongARM).  */
3017       max_insns_skipped = 6;
3018
3019       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3020       if (TARGET_THUMB2)
3021         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3022     }
3023   else
3024     max_insns_skipped = current_tune->max_insns_skipped;
3025
3026   /* Hot/Cold partitioning is not currently supported, since we can't
3027      handle literal pool placement in that case.  */
3028   if (flag_reorder_blocks_and_partition)
3029     {
3030       inform (input_location,
3031               "-freorder-blocks-and-partition not supported on this architecture");
3032       flag_reorder_blocks_and_partition = 0;
3033       flag_reorder_blocks = 1;
3034     }
3035
3036   if (flag_pic)
3037     /* Hoisting PIC address calculations more aggressively provides a small,
3038        but measurable, size reduction for PIC code.  Therefore, we decrease
3039        the bar for unrestricted expression hoisting to the cost of PIC address
3040        calculation, which is 2 instructions.  */
3041     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3042                            global_options.x_param_values,
3043                            global_options_set.x_param_values);
3044
3045   /* ARM EABI defaults to strict volatile bitfields.  */
3046   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3047       && abi_version_at_least(2))
3048     flag_strict_volatile_bitfields = 1;
3049
3050   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3051      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3052   if (flag_prefetch_loop_arrays < 0
3053       && HAVE_prefetch
3054       && optimize >= 3
3055       && current_tune->num_prefetch_slots > 0)
3056     flag_prefetch_loop_arrays = 1;
3057
3058   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3059      defaults unless we are tuning for a core we have researched values for.  */
3060   if (current_tune->num_prefetch_slots > 0)
3061     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3062                            current_tune->num_prefetch_slots,
3063                            global_options.x_param_values,
3064                            global_options_set.x_param_values);
3065   if (current_tune->l1_cache_line_size >= 0)
3066     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3067                            current_tune->l1_cache_line_size,
3068                            global_options.x_param_values,
3069                            global_options_set.x_param_values);
3070   if (current_tune->l1_cache_size >= 0)
3071     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3072                            current_tune->l1_cache_size,
3073                            global_options.x_param_values,
3074                            global_options_set.x_param_values);
3075
3076   /* Use Neon to perform 64-bits operations rather than core
3077      registers.  */
3078   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3079   if (use_neon_for_64bits == 1)
3080      prefer_neon_for_64bits = true;
3081
3082   /* Use the alternative scheduling-pressure algorithm by default.  */
3083   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3084                          global_options.x_param_values,
3085                          global_options_set.x_param_values);
3086
3087   /* Disable shrink-wrap when optimizing function for size, since it tends to
3088      generate additional returns.  */
3089   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3090     flag_shrink_wrap = false;
3091   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3092   if (TARGET_APCS_FRAME)
3093     flag_shrink_wrap = false;
3094
3095   /* We only support -mslow-flash-data on armv7-m targets.  */
3096   if (target_slow_flash_data
3097       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3098           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3099     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3100
3101   /* Currently, for slow flash data, we just disable literal pools.  */
3102   if (target_slow_flash_data)
3103     arm_disable_literal_pool = true;
3104
3105   /* Thumb2 inline assembly code should always use unified syntax.
3106      This will apply to ARM and Thumb1 eventually.  */
3107   if (TARGET_THUMB2)
3108     inline_asm_unified = 1;
3109
3110   /* Disable scheduling fusion by default if it's not armv7 processor
3111      or doesn't prefer ldrd/strd.  */
3112   if (flag_schedule_fusion == 2
3113       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3114     flag_schedule_fusion = 0;
3115
3116   /* Register global variables with the garbage collector.  */
3117   arm_add_gc_roots ();
3118 }
3119
3120 static void
3121 arm_add_gc_roots (void)
3122 {
3123   gcc_obstack_init(&minipool_obstack);
3124   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3125 }
3126 \f
3127 /* A table of known ARM exception types.
3128    For use with the interrupt function attribute.  */
3129
3130 typedef struct
3131 {
3132   const char *const arg;
3133   const unsigned long return_value;
3134 }
3135 isr_attribute_arg;
3136
3137 static const isr_attribute_arg isr_attribute_args [] =
3138 {
3139   { "IRQ",   ARM_FT_ISR },
3140   { "irq",   ARM_FT_ISR },
3141   { "FIQ",   ARM_FT_FIQ },
3142   { "fiq",   ARM_FT_FIQ },
3143   { "ABORT", ARM_FT_ISR },
3144   { "abort", ARM_FT_ISR },
3145   { "ABORT", ARM_FT_ISR },
3146   { "abort", ARM_FT_ISR },
3147   { "UNDEF", ARM_FT_EXCEPTION },
3148   { "undef", ARM_FT_EXCEPTION },
3149   { "SWI",   ARM_FT_EXCEPTION },
3150   { "swi",   ARM_FT_EXCEPTION },
3151   { NULL,    ARM_FT_NORMAL }
3152 };
3153
3154 /* Returns the (interrupt) function type of the current
3155    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3156
3157 static unsigned long
3158 arm_isr_value (tree argument)
3159 {
3160   const isr_attribute_arg * ptr;
3161   const char *              arg;
3162
3163   if (!arm_arch_notm)
3164     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3165
3166   /* No argument - default to IRQ.  */
3167   if (argument == NULL_TREE)
3168     return ARM_FT_ISR;
3169
3170   /* Get the value of the argument.  */
3171   if (TREE_VALUE (argument) == NULL_TREE
3172       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3173     return ARM_FT_UNKNOWN;
3174
3175   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3176
3177   /* Check it against the list of known arguments.  */
3178   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3179     if (streq (arg, ptr->arg))
3180       return ptr->return_value;
3181
3182   /* An unrecognized interrupt type.  */
3183   return ARM_FT_UNKNOWN;
3184 }
3185
3186 /* Computes the type of the current function.  */
3187
3188 static unsigned long
3189 arm_compute_func_type (void)
3190 {
3191   unsigned long type = ARM_FT_UNKNOWN;
3192   tree a;
3193   tree attr;
3194
3195   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3196
3197   /* Decide if the current function is volatile.  Such functions
3198      never return, and many memory cycles can be saved by not storing
3199      register values that will never be needed again.  This optimization
3200      was added to speed up context switching in a kernel application.  */
3201   if (optimize > 0
3202       && (TREE_NOTHROW (current_function_decl)
3203           || !(flag_unwind_tables
3204                || (flag_exceptions
3205                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3206       && TREE_THIS_VOLATILE (current_function_decl))
3207     type |= ARM_FT_VOLATILE;
3208
3209   if (cfun->static_chain_decl != NULL)
3210     type |= ARM_FT_NESTED;
3211
3212   attr = DECL_ATTRIBUTES (current_function_decl);
3213
3214   a = lookup_attribute ("naked", attr);
3215   if (a != NULL_TREE)
3216     type |= ARM_FT_NAKED;
3217
3218   a = lookup_attribute ("isr", attr);
3219   if (a == NULL_TREE)
3220     a = lookup_attribute ("interrupt", attr);
3221
3222   if (a == NULL_TREE)
3223     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3224   else
3225     type |= arm_isr_value (TREE_VALUE (a));
3226
3227   return type;
3228 }
3229
3230 /* Returns the type of the current function.  */
3231
3232 unsigned long
3233 arm_current_func_type (void)
3234 {
3235   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3236     cfun->machine->func_type = arm_compute_func_type ();
3237
3238   return cfun->machine->func_type;
3239 }
3240
3241 bool
3242 arm_allocate_stack_slots_for_args (void)
3243 {
3244   /* Naked functions should not allocate stack slots for arguments.  */
3245   return !IS_NAKED (arm_current_func_type ());
3246 }
3247
3248 static bool
3249 arm_warn_func_return (tree decl)
3250 {
3251   /* Naked functions are implemented entirely in assembly, including the
3252      return sequence, so suppress warnings about this.  */
3253   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3254 }
3255
3256 \f
3257 /* Output assembler code for a block containing the constant parts
3258    of a trampoline, leaving space for the variable parts.
3259
3260    On the ARM, (if r8 is the static chain regnum, and remembering that
3261    referencing pc adds an offset of 8) the trampoline looks like:
3262            ldr          r8, [pc, #0]
3263            ldr          pc, [pc]
3264            .word        static chain value
3265            .word        function's address
3266    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3267
3268 static void
3269 arm_asm_trampoline_template (FILE *f)
3270 {
3271   if (TARGET_ARM)
3272     {
3273       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3274       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3275     }
3276   else if (TARGET_THUMB2)
3277     {
3278       /* The Thumb-2 trampoline is similar to the arm implementation.
3279          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3280       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3281                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3282       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3283     }
3284   else
3285     {
3286       ASM_OUTPUT_ALIGN (f, 2);
3287       fprintf (f, "\t.code\t16\n");
3288       fprintf (f, ".Ltrampoline_start:\n");
3289       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3290       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3291       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3292       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3293       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3294       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3295     }
3296   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3297   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3298 }
3299
3300 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3301
3302 static void
3303 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3304 {
3305   rtx fnaddr, mem, a_tramp;
3306
3307   emit_block_move (m_tramp, assemble_trampoline_template (),
3308                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3309
3310   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3311   emit_move_insn (mem, chain_value);
3312
3313   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3314   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3315   emit_move_insn (mem, fnaddr);
3316
3317   a_tramp = XEXP (m_tramp, 0);
3318   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3319                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3320                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3321 }
3322
3323 /* Thumb trampolines should be entered in thumb mode, so set
3324    the bottom bit of the address.  */
3325
3326 static rtx
3327 arm_trampoline_adjust_address (rtx addr)
3328 {
3329   if (TARGET_THUMB)
3330     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3331                                 NULL, 0, OPTAB_LIB_WIDEN);
3332   return addr;
3333 }
3334 \f
3335 /* Return 1 if it is possible to return using a single instruction.
3336    If SIBLING is non-null, this is a test for a return before a sibling
3337    call.  SIBLING is the call insn, so we can examine its register usage.  */
3338
3339 int
3340 use_return_insn (int iscond, rtx sibling)
3341 {
3342   int regno;
3343   unsigned int func_type;
3344   unsigned long saved_int_regs;
3345   unsigned HOST_WIDE_INT stack_adjust;
3346   arm_stack_offsets *offsets;
3347
3348   /* Never use a return instruction before reload has run.  */
3349   if (!reload_completed)
3350     return 0;
3351
3352   func_type = arm_current_func_type ();
3353
3354   /* Naked, volatile and stack alignment functions need special
3355      consideration.  */
3356   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3357     return 0;
3358
3359   /* So do interrupt functions that use the frame pointer and Thumb
3360      interrupt functions.  */
3361   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3362     return 0;
3363
3364   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3365       && !optimize_function_for_size_p (cfun))
3366     return 0;
3367
3368   offsets = arm_get_frame_offsets ();
3369   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3370
3371   /* As do variadic functions.  */
3372   if (crtl->args.pretend_args_size
3373       || cfun->machine->uses_anonymous_args
3374       /* Or if the function calls __builtin_eh_return () */
3375       || crtl->calls_eh_return
3376       /* Or if the function calls alloca */
3377       || cfun->calls_alloca
3378       /* Or if there is a stack adjustment.  However, if the stack pointer
3379          is saved on the stack, we can use a pre-incrementing stack load.  */
3380       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3381                                  && stack_adjust == 4)))
3382     return 0;
3383
3384   saved_int_regs = offsets->saved_regs_mask;
3385
3386   /* Unfortunately, the insn
3387
3388        ldmib sp, {..., sp, ...}
3389
3390      triggers a bug on most SA-110 based devices, such that the stack
3391      pointer won't be correctly restored if the instruction takes a
3392      page fault.  We work around this problem by popping r3 along with
3393      the other registers, since that is never slower than executing
3394      another instruction.
3395
3396      We test for !arm_arch5 here, because code for any architecture
3397      less than this could potentially be run on one of the buggy
3398      chips.  */
3399   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3400     {
3401       /* Validate that r3 is a call-clobbered register (always true in
3402          the default abi) ...  */
3403       if (!call_used_regs[3])
3404         return 0;
3405
3406       /* ... that it isn't being used for a return value ... */
3407       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3408         return 0;
3409
3410       /* ... or for a tail-call argument ...  */
3411       if (sibling)
3412         {
3413           gcc_assert (CALL_P (sibling));
3414
3415           if (find_regno_fusage (sibling, USE, 3))
3416             return 0;
3417         }
3418
3419       /* ... and that there are no call-saved registers in r0-r2
3420          (always true in the default ABI).  */
3421       if (saved_int_regs & 0x7)
3422         return 0;
3423     }
3424
3425   /* Can't be done if interworking with Thumb, and any registers have been
3426      stacked.  */
3427   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3428     return 0;
3429
3430   /* On StrongARM, conditional returns are expensive if they aren't
3431      taken and multiple registers have been stacked.  */
3432   if (iscond && arm_tune_strongarm)
3433     {
3434       /* Conditional return when just the LR is stored is a simple
3435          conditional-load instruction, that's not expensive.  */
3436       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3437         return 0;
3438
3439       if (flag_pic
3440           && arm_pic_register != INVALID_REGNUM
3441           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3442         return 0;
3443     }
3444
3445   /* If there are saved registers but the LR isn't saved, then we need
3446      two instructions for the return.  */
3447   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3448     return 0;
3449
3450   /* Can't be done if any of the VFP regs are pushed,
3451      since this also requires an insn.  */
3452   if (TARGET_HARD_FLOAT && TARGET_VFP)
3453     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3454       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3455         return 0;
3456
3457   if (TARGET_REALLY_IWMMXT)
3458     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3459       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3460         return 0;
3461
3462   return 1;
3463 }
3464
3465 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3466    shrink-wrapping if possible.  This is the case if we need to emit a
3467    prologue, which we can test by looking at the offsets.  */
3468 bool
3469 use_simple_return_p (void)
3470 {
3471   arm_stack_offsets *offsets;
3472
3473   offsets = arm_get_frame_offsets ();
3474   return offsets->outgoing_args != 0;
3475 }
3476
3477 /* Return TRUE if int I is a valid immediate ARM constant.  */
3478
3479 int
3480 const_ok_for_arm (HOST_WIDE_INT i)
3481 {
3482   int lowbit;
3483
3484   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3485      be all zero, or all one.  */
3486   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3487       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3488           != ((~(unsigned HOST_WIDE_INT) 0)
3489               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3490     return FALSE;
3491
3492   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3493
3494   /* Fast return for 0 and small values.  We must do this for zero, since
3495      the code below can't handle that one case.  */
3496   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3497     return TRUE;
3498
3499   /* Get the number of trailing zeros.  */
3500   lowbit = ffs((int) i) - 1;
3501
3502   /* Only even shifts are allowed in ARM mode so round down to the
3503      nearest even number.  */
3504   if (TARGET_ARM)
3505     lowbit &= ~1;
3506
3507   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3508     return TRUE;
3509
3510   if (TARGET_ARM)
3511     {
3512       /* Allow rotated constants in ARM mode.  */
3513       if (lowbit <= 4
3514            && ((i & ~0xc000003f) == 0
3515                || (i & ~0xf000000f) == 0
3516                || (i & ~0xfc000003) == 0))
3517         return TRUE;
3518     }
3519   else
3520     {
3521       HOST_WIDE_INT v;
3522
3523       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3524       v = i & 0xff;
3525       v |= v << 16;
3526       if (i == v || i == (v | (v << 8)))
3527         return TRUE;
3528
3529       /* Allow repeated pattern 0xXY00XY00.  */
3530       v = i & 0xff00;
3531       v |= v << 16;
3532       if (i == v)
3533         return TRUE;
3534     }
3535
3536   return FALSE;
3537 }
3538
3539 /* Return true if I is a valid constant for the operation CODE.  */
3540 int
3541 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3542 {
3543   if (const_ok_for_arm (i))
3544     return 1;
3545
3546   switch (code)
3547     {
3548     case SET:
3549       /* See if we can use movw.  */
3550       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3551         return 1;
3552       else
3553         /* Otherwise, try mvn.  */
3554         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3555
3556     case PLUS:
3557       /* See if we can use addw or subw.  */
3558       if (TARGET_THUMB2
3559           && ((i & 0xfffff000) == 0
3560               || ((-i) & 0xfffff000) == 0))
3561         return 1;
3562       /* else fall through.  */
3563
3564     case COMPARE:
3565     case EQ:
3566     case NE:
3567     case GT:
3568     case LE:
3569     case LT:
3570     case GE:
3571     case GEU:
3572     case LTU:
3573     case GTU:
3574     case LEU:
3575     case UNORDERED:
3576     case ORDERED:
3577     case UNEQ:
3578     case UNGE:
3579     case UNLT:
3580     case UNGT:
3581     case UNLE:
3582       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3583
3584     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3585     case XOR:
3586       return 0;
3587
3588     case IOR:
3589       if (TARGET_THUMB2)
3590         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3591       return 0;
3592
3593     case AND:
3594       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3595
3596     default:
3597       gcc_unreachable ();
3598     }
3599 }
3600
3601 /* Return true if I is a valid di mode constant for the operation CODE.  */
3602 int
3603 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3604 {
3605   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3606   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3607   rtx hi = GEN_INT (hi_val);
3608   rtx lo = GEN_INT (lo_val);
3609
3610   if (TARGET_THUMB1)
3611     return 0;
3612
3613   switch (code)
3614     {
3615     case AND:
3616     case IOR:
3617     case XOR:
3618       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3619               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3620     case PLUS:
3621       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3622
3623     default:
3624       return 0;
3625     }
3626 }
3627
3628 /* Emit a sequence of insns to handle a large constant.
3629    CODE is the code of the operation required, it can be any of SET, PLUS,
3630    IOR, AND, XOR, MINUS;
3631    MODE is the mode in which the operation is being performed;
3632    VAL is the integer to operate on;
3633    SOURCE is the other operand (a register, or a null-pointer for SET);
3634    SUBTARGETS means it is safe to create scratch registers if that will
3635    either produce a simpler sequence, or we will want to cse the values.
3636    Return value is the number of insns emitted.  */
3637
3638 /* ??? Tweak this for thumb2.  */
3639 int
3640 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3641                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3642 {
3643   rtx cond;
3644
3645   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3646     cond = COND_EXEC_TEST (PATTERN (insn));
3647   else
3648     cond = NULL_RTX;
3649
3650   if (subtargets || code == SET
3651       || (REG_P (target) && REG_P (source)
3652           && REGNO (target) != REGNO (source)))
3653     {
3654       /* After arm_reorg has been called, we can't fix up expensive
3655          constants by pushing them into memory so we must synthesize
3656          them in-line, regardless of the cost.  This is only likely to
3657          be more costly on chips that have load delay slots and we are
3658          compiling without running the scheduler (so no splitting
3659          occurred before the final instruction emission).
3660
3661          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3662       */
3663       if (!cfun->machine->after_arm_reorg
3664           && !cond
3665           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3666                                 1, 0)
3667               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3668                  + (code != SET))))
3669         {
3670           if (code == SET)
3671             {
3672               /* Currently SET is the only monadic value for CODE, all
3673                  the rest are diadic.  */
3674               if (TARGET_USE_MOVT)
3675                 arm_emit_movpair (target, GEN_INT (val));
3676               else
3677                 emit_set_insn (target, GEN_INT (val));
3678
3679               return 1;
3680             }
3681           else
3682             {
3683               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3684
3685               if (TARGET_USE_MOVT)
3686                 arm_emit_movpair (temp, GEN_INT (val));
3687               else
3688                 emit_set_insn (temp, GEN_INT (val));
3689
3690               /* For MINUS, the value is subtracted from, since we never
3691                  have subtraction of a constant.  */
3692               if (code == MINUS)
3693                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3694               else
3695                 emit_set_insn (target,
3696                                gen_rtx_fmt_ee (code, mode, source, temp));
3697               return 2;
3698             }
3699         }
3700     }
3701
3702   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3703                            1);
3704 }
3705
3706 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3707    ARM/THUMB2 immediates, and add up to VAL.
3708    Thr function return value gives the number of insns required.  */
3709 static int
3710 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3711                             struct four_ints *return_sequence)
3712 {
3713   int best_consecutive_zeros = 0;
3714   int i;
3715   int best_start = 0;
3716   int insns1, insns2;
3717   struct four_ints tmp_sequence;
3718
3719   /* If we aren't targeting ARM, the best place to start is always at
3720      the bottom, otherwise look more closely.  */
3721   if (TARGET_ARM)
3722     {
3723       for (i = 0; i < 32; i += 2)
3724         {
3725           int consecutive_zeros = 0;
3726
3727           if (!(val & (3 << i)))
3728             {
3729               while ((i < 32) && !(val & (3 << i)))
3730                 {
3731                   consecutive_zeros += 2;
3732                   i += 2;
3733                 }
3734               if (consecutive_zeros > best_consecutive_zeros)
3735                 {
3736                   best_consecutive_zeros = consecutive_zeros;
3737                   best_start = i - consecutive_zeros;
3738                 }
3739               i -= 2;
3740             }
3741         }
3742     }
3743
3744   /* So long as it won't require any more insns to do so, it's
3745      desirable to emit a small constant (in bits 0...9) in the last
3746      insn.  This way there is more chance that it can be combined with
3747      a later addressing insn to form a pre-indexed load or store
3748      operation.  Consider:
3749
3750            *((volatile int *)0xe0000100) = 1;
3751            *((volatile int *)0xe0000110) = 2;
3752
3753      We want this to wind up as:
3754
3755             mov rA, #0xe0000000
3756             mov rB, #1
3757             str rB, [rA, #0x100]
3758             mov rB, #2
3759             str rB, [rA, #0x110]
3760
3761      rather than having to synthesize both large constants from scratch.
3762
3763      Therefore, we calculate how many insns would be required to emit
3764      the constant starting from `best_start', and also starting from
3765      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3766      yield a shorter sequence, we may as well use zero.  */
3767   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3768   if (best_start != 0
3769       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3770     {
3771       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3772       if (insns2 <= insns1)
3773         {
3774           *return_sequence = tmp_sequence;
3775           insns1 = insns2;
3776         }
3777     }
3778
3779   return insns1;
3780 }
3781
3782 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3783 static int
3784 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3785                              struct four_ints *return_sequence, int i)
3786 {
3787   int remainder = val & 0xffffffff;
3788   int insns = 0;
3789
3790   /* Try and find a way of doing the job in either two or three
3791      instructions.
3792
3793      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3794      location.  We start at position I.  This may be the MSB, or
3795      optimial_immediate_sequence may have positioned it at the largest block
3796      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3797      wrapping around to the top of the word when we drop off the bottom.
3798      In the worst case this code should produce no more than four insns.
3799
3800      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3801      constants, shifted to any arbitrary location.  We should always start
3802      at the MSB.  */
3803   do
3804     {
3805       int end;
3806       unsigned int b1, b2, b3, b4;
3807       unsigned HOST_WIDE_INT result;
3808       int loc;
3809
3810       gcc_assert (insns < 4);
3811
3812       if (i <= 0)
3813         i += 32;
3814
3815       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3816       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3817         {
3818           loc = i;
3819           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3820             /* We can use addw/subw for the last 12 bits.  */
3821             result = remainder;
3822           else
3823             {
3824               /* Use an 8-bit shifted/rotated immediate.  */
3825               end = i - 8;
3826               if (end < 0)
3827                 end += 32;
3828               result = remainder & ((0x0ff << end)
3829                                    | ((i < end) ? (0xff >> (32 - end))
3830                                                 : 0));
3831               i -= 8;
3832             }
3833         }
3834       else
3835         {
3836           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3837              arbitrary shifts.  */
3838           i -= TARGET_ARM ? 2 : 1;
3839           continue;
3840         }
3841
3842       /* Next, see if we can do a better job with a thumb2 replicated
3843          constant.
3844
3845          We do it this way around to catch the cases like 0x01F001E0 where
3846          two 8-bit immediates would work, but a replicated constant would
3847          make it worse.
3848
3849          TODO: 16-bit constants that don't clear all the bits, but still win.
3850          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3851       if (TARGET_THUMB2)
3852         {
3853           b1 = (remainder & 0xff000000) >> 24;
3854           b2 = (remainder & 0x00ff0000) >> 16;
3855           b3 = (remainder & 0x0000ff00) >> 8;
3856           b4 = remainder & 0xff;
3857
3858           if (loc > 24)
3859             {
3860               /* The 8-bit immediate already found clears b1 (and maybe b2),
3861                  but must leave b3 and b4 alone.  */
3862
3863               /* First try to find a 32-bit replicated constant that clears
3864                  almost everything.  We can assume that we can't do it in one,
3865                  or else we wouldn't be here.  */
3866               unsigned int tmp = b1 & b2 & b3 & b4;
3867               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3868                                   + (tmp << 24);
3869               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3870                                             + (tmp == b3) + (tmp == b4);
3871               if (tmp
3872                   && (matching_bytes >= 3
3873                       || (matching_bytes == 2
3874                           && const_ok_for_op (remainder & ~tmp2, code))))
3875                 {
3876                   /* At least 3 of the bytes match, and the fourth has at
3877                      least as many bits set, or two of the bytes match
3878                      and it will only require one more insn to finish.  */
3879                   result = tmp2;
3880                   i = tmp != b1 ? 32
3881                       : tmp != b2 ? 24
3882                       : tmp != b3 ? 16
3883                       : 8;
3884                 }
3885
3886               /* Second, try to find a 16-bit replicated constant that can
3887                  leave three of the bytes clear.  If b2 or b4 is already
3888                  zero, then we can.  If the 8-bit from above would not
3889                  clear b2 anyway, then we still win.  */
3890               else if (b1 == b3 && (!b2 || !b4
3891                                || (remainder & 0x00ff0000 & ~result)))
3892                 {
3893                   result = remainder & 0xff00ff00;
3894                   i = 24;
3895                 }
3896             }
3897           else if (loc > 16)
3898             {
3899               /* The 8-bit immediate already found clears b2 (and maybe b3)
3900                  and we don't get here unless b1 is alredy clear, but it will
3901                  leave b4 unchanged.  */
3902
3903               /* If we can clear b2 and b4 at once, then we win, since the
3904                  8-bits couldn't possibly reach that far.  */
3905               if (b2 == b4)
3906                 {
3907                   result = remainder & 0x00ff00ff;
3908                   i = 16;
3909                 }
3910             }
3911         }
3912
3913       return_sequence->i[insns++] = result;
3914       remainder &= ~result;
3915
3916       if (code == SET || code == MINUS)
3917         code = PLUS;
3918     }
3919   while (remainder);
3920
3921   return insns;
3922 }
3923
3924 /* Emit an instruction with the indicated PATTERN.  If COND is
3925    non-NULL, conditionalize the execution of the instruction on COND
3926    being true.  */
3927
3928 static void
3929 emit_constant_insn (rtx cond, rtx pattern)
3930 {
3931   if (cond)
3932     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3933   emit_insn (pattern);
3934 }
3935
3936 /* As above, but extra parameter GENERATE which, if clear, suppresses
3937    RTL generation.  */
3938
3939 static int
3940 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3941                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3942                   int generate)
3943 {
3944   int can_invert = 0;
3945   int can_negate = 0;
3946   int final_invert = 0;
3947   int i;
3948   int set_sign_bit_copies = 0;
3949   int clear_sign_bit_copies = 0;
3950   int clear_zero_bit_copies = 0;
3951   int set_zero_bit_copies = 0;
3952   int insns = 0, neg_insns, inv_insns;
3953   unsigned HOST_WIDE_INT temp1, temp2;
3954   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3955   struct four_ints *immediates;
3956   struct four_ints pos_immediates, neg_immediates, inv_immediates;
3957
3958   /* Find out which operations are safe for a given CODE.  Also do a quick
3959      check for degenerate cases; these can occur when DImode operations
3960      are split.  */
3961   switch (code)
3962     {
3963     case SET:
3964       can_invert = 1;
3965       break;
3966
3967     case PLUS:
3968       can_negate = 1;
3969       break;
3970
3971     case IOR:
3972       if (remainder == 0xffffffff)
3973         {
3974           if (generate)
3975             emit_constant_insn (cond,
3976                                 gen_rtx_SET (VOIDmode, target,
3977                                              GEN_INT (ARM_SIGN_EXTEND (val))));
3978           return 1;
3979         }
3980
3981       if (remainder == 0)
3982         {
3983           if (reload_completed && rtx_equal_p (target, source))
3984             return 0;
3985
3986           if (generate)
3987             emit_constant_insn (cond,
3988                                 gen_rtx_SET (VOIDmode, target, source));
3989           return 1;
3990         }
3991       break;
3992
3993     case AND:
3994       if (remainder == 0)
3995         {
3996           if (generate)
3997             emit_constant_insn (cond,
3998                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
3999           return 1;
4000         }
4001       if (remainder == 0xffffffff)
4002         {
4003           if (reload_completed && rtx_equal_p (target, source))
4004             return 0;
4005           if (generate)
4006             emit_constant_insn (cond,
4007                                 gen_rtx_SET (VOIDmode, target, source));
4008           return 1;
4009         }
4010       can_invert = 1;
4011       break;
4012
4013     case XOR:
4014       if (remainder == 0)
4015         {
4016           if (reload_completed && rtx_equal_p (target, source))
4017             return 0;
4018           if (generate)
4019             emit_constant_insn (cond,
4020                                 gen_rtx_SET (VOIDmode, target, source));
4021           return 1;
4022         }
4023
4024       if (remainder == 0xffffffff)
4025         {
4026           if (generate)
4027             emit_constant_insn (cond,
4028                                 gen_rtx_SET (VOIDmode, target,
4029                                              gen_rtx_NOT (mode, source)));
4030           return 1;
4031         }
4032       final_invert = 1;
4033       break;
4034
4035     case MINUS:
4036       /* We treat MINUS as (val - source), since (source - val) is always
4037          passed as (source + (-val)).  */
4038       if (remainder == 0)
4039         {
4040           if (generate)
4041             emit_constant_insn (cond,
4042                                 gen_rtx_SET (VOIDmode, target,
4043                                              gen_rtx_NEG (mode, source)));
4044           return 1;
4045         }
4046       if (const_ok_for_arm (val))
4047         {
4048           if (generate)
4049             emit_constant_insn (cond,
4050                                 gen_rtx_SET (VOIDmode, target,
4051                                              gen_rtx_MINUS (mode, GEN_INT (val),
4052                                                             source)));
4053           return 1;
4054         }
4055
4056       break;
4057
4058     default:
4059       gcc_unreachable ();
4060     }
4061
4062   /* If we can do it in one insn get out quickly.  */
4063   if (const_ok_for_op (val, code))
4064     {
4065       if (generate)
4066         emit_constant_insn (cond,
4067                             gen_rtx_SET (VOIDmode, target,
4068                                          (source
4069                                           ? gen_rtx_fmt_ee (code, mode, source,
4070                                                             GEN_INT (val))
4071                                           : GEN_INT (val))));
4072       return 1;
4073     }
4074
4075   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4076      insn.  */
4077   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4078       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4079     {
4080       if (generate)
4081         {
4082           if (mode == SImode && i == 16)
4083             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4084                smaller insn.  */
4085             emit_constant_insn (cond,
4086                                 gen_zero_extendhisi2
4087                                 (target, gen_lowpart (HImode, source)));
4088           else
4089             /* Extz only supports SImode, but we can coerce the operands
4090                into that mode.  */
4091             emit_constant_insn (cond,
4092                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4093                                               gen_lowpart (SImode, source),
4094                                               GEN_INT (i), const0_rtx));
4095         }
4096
4097       return 1;
4098     }
4099
4100   /* Calculate a few attributes that may be useful for specific
4101      optimizations.  */
4102   /* Count number of leading zeros.  */
4103   for (i = 31; i >= 0; i--)
4104     {
4105       if ((remainder & (1 << i)) == 0)
4106         clear_sign_bit_copies++;
4107       else
4108         break;
4109     }
4110
4111   /* Count number of leading 1's.  */
4112   for (i = 31; i >= 0; i--)
4113     {
4114       if ((remainder & (1 << i)) != 0)
4115         set_sign_bit_copies++;
4116       else
4117         break;
4118     }
4119
4120   /* Count number of trailing zero's.  */
4121   for (i = 0; i <= 31; i++)
4122     {
4123       if ((remainder & (1 << i)) == 0)
4124         clear_zero_bit_copies++;
4125       else
4126         break;
4127     }
4128
4129   /* Count number of trailing 1's.  */
4130   for (i = 0; i <= 31; i++)
4131     {
4132       if ((remainder & (1 << i)) != 0)
4133         set_zero_bit_copies++;
4134       else
4135         break;
4136     }
4137
4138   switch (code)
4139     {
4140     case SET:
4141       /* See if we can do this by sign_extending a constant that is known
4142          to be negative.  This is a good, way of doing it, since the shift
4143          may well merge into a subsequent insn.  */
4144       if (set_sign_bit_copies > 1)
4145         {
4146           if (const_ok_for_arm
4147               (temp1 = ARM_SIGN_EXTEND (remainder
4148                                         << (set_sign_bit_copies - 1))))
4149             {
4150               if (generate)
4151                 {
4152                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4153                   emit_constant_insn (cond,
4154                                       gen_rtx_SET (VOIDmode, new_src,
4155                                                    GEN_INT (temp1)));
4156                   emit_constant_insn (cond,
4157                                       gen_ashrsi3 (target, new_src,
4158                                                    GEN_INT (set_sign_bit_copies - 1)));
4159                 }
4160               return 2;
4161             }
4162           /* For an inverted constant, we will need to set the low bits,
4163              these will be shifted out of harm's way.  */
4164           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4165           if (const_ok_for_arm (~temp1))
4166             {
4167               if (generate)
4168                 {
4169                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4170                   emit_constant_insn (cond,
4171                                       gen_rtx_SET (VOIDmode, new_src,
4172                                                    GEN_INT (temp1)));
4173                   emit_constant_insn (cond,
4174                                       gen_ashrsi3 (target, new_src,
4175                                                    GEN_INT (set_sign_bit_copies - 1)));
4176                 }
4177               return 2;
4178             }
4179         }
4180
4181       /* See if we can calculate the value as the difference between two
4182          valid immediates.  */
4183       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4184         {
4185           int topshift = clear_sign_bit_copies & ~1;
4186
4187           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4188                                    & (0xff000000 >> topshift));
4189
4190           /* If temp1 is zero, then that means the 9 most significant
4191              bits of remainder were 1 and we've caused it to overflow.
4192              When topshift is 0 we don't need to do anything since we
4193              can borrow from 'bit 32'.  */
4194           if (temp1 == 0 && topshift != 0)
4195             temp1 = 0x80000000 >> (topshift - 1);
4196
4197           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4198
4199           if (const_ok_for_arm (temp2))
4200             {
4201               if (generate)
4202                 {
4203                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4204                   emit_constant_insn (cond,
4205                                       gen_rtx_SET (VOIDmode, new_src,
4206                                                    GEN_INT (temp1)));
4207                   emit_constant_insn (cond,
4208                                       gen_addsi3 (target, new_src,
4209                                                   GEN_INT (-temp2)));
4210                 }
4211
4212               return 2;
4213             }
4214         }
4215
4216       /* See if we can generate this by setting the bottom (or the top)
4217          16 bits, and then shifting these into the other half of the
4218          word.  We only look for the simplest cases, to do more would cost
4219          too much.  Be careful, however, not to generate this when the
4220          alternative would take fewer insns.  */
4221       if (val & 0xffff0000)
4222         {
4223           temp1 = remainder & 0xffff0000;
4224           temp2 = remainder & 0x0000ffff;
4225
4226           /* Overlaps outside this range are best done using other methods.  */
4227           for (i = 9; i < 24; i++)
4228             {
4229               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4230                   && !const_ok_for_arm (temp2))
4231                 {
4232                   rtx new_src = (subtargets
4233                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4234                                  : target);
4235                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4236                                             source, subtargets, generate);
4237                   source = new_src;
4238                   if (generate)
4239                     emit_constant_insn
4240                       (cond,
4241                        gen_rtx_SET
4242                        (VOIDmode, target,
4243                         gen_rtx_IOR (mode,
4244                                      gen_rtx_ASHIFT (mode, source,
4245                                                      GEN_INT (i)),
4246                                      source)));
4247                   return insns + 1;
4248                 }
4249             }
4250
4251           /* Don't duplicate cases already considered.  */
4252           for (i = 17; i < 24; i++)
4253             {
4254               if (((temp1 | (temp1 >> i)) == remainder)
4255                   && !const_ok_for_arm (temp1))
4256                 {
4257                   rtx new_src = (subtargets
4258                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4259                                  : target);
4260                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4261                                             source, subtargets, generate);
4262                   source = new_src;
4263                   if (generate)
4264                     emit_constant_insn
4265                       (cond,
4266                        gen_rtx_SET (VOIDmode, target,
4267                                     gen_rtx_IOR
4268                                     (mode,
4269                                      gen_rtx_LSHIFTRT (mode, source,
4270                                                        GEN_INT (i)),
4271                                      source)));
4272                   return insns + 1;
4273                 }
4274             }
4275         }
4276       break;
4277
4278     case IOR:
4279     case XOR:
4280       /* If we have IOR or XOR, and the constant can be loaded in a
4281          single instruction, and we can find a temporary to put it in,
4282          then this can be done in two instructions instead of 3-4.  */
4283       if (subtargets
4284           /* TARGET can't be NULL if SUBTARGETS is 0 */
4285           || (reload_completed && !reg_mentioned_p (target, source)))
4286         {
4287           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4288             {
4289               if (generate)
4290                 {
4291                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4292
4293                   emit_constant_insn (cond,
4294                                       gen_rtx_SET (VOIDmode, sub,
4295                                                    GEN_INT (val)));
4296                   emit_constant_insn (cond,
4297                                       gen_rtx_SET (VOIDmode, target,
4298                                                    gen_rtx_fmt_ee (code, mode,
4299                                                                    source, sub)));
4300                 }
4301               return 2;
4302             }
4303         }
4304
4305       if (code == XOR)
4306         break;
4307
4308       /*  Convert.
4309           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4310                              and the remainder 0s for e.g. 0xfff00000)
4311           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4312
4313           This can be done in 2 instructions by using shifts with mov or mvn.
4314           e.g. for
4315           x = x | 0xfff00000;
4316           we generate.
4317           mvn   r0, r0, asl #12
4318           mvn   r0, r0, lsr #12  */
4319       if (set_sign_bit_copies > 8
4320           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4321         {
4322           if (generate)
4323             {
4324               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4325               rtx shift = GEN_INT (set_sign_bit_copies);
4326
4327               emit_constant_insn
4328                 (cond,
4329                  gen_rtx_SET (VOIDmode, sub,
4330                               gen_rtx_NOT (mode,
4331                                            gen_rtx_ASHIFT (mode,
4332                                                            source,
4333                                                            shift))));
4334               emit_constant_insn
4335                 (cond,
4336                  gen_rtx_SET (VOIDmode, target,
4337                               gen_rtx_NOT (mode,
4338                                            gen_rtx_LSHIFTRT (mode, sub,
4339                                                              shift))));
4340             }
4341           return 2;
4342         }
4343
4344       /* Convert
4345           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4346            to
4347           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4348
4349           For eg. r0 = r0 | 0xfff
4350                mvn      r0, r0, lsr #12
4351                mvn      r0, r0, asl #12
4352
4353       */
4354       if (set_zero_bit_copies > 8
4355           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4356         {
4357           if (generate)
4358             {
4359               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4360               rtx shift = GEN_INT (set_zero_bit_copies);
4361
4362               emit_constant_insn
4363                 (cond,
4364                  gen_rtx_SET (VOIDmode, sub,
4365                               gen_rtx_NOT (mode,
4366                                            gen_rtx_LSHIFTRT (mode,
4367                                                              source,
4368                                                              shift))));
4369               emit_constant_insn
4370                 (cond,
4371                  gen_rtx_SET (VOIDmode, target,
4372                               gen_rtx_NOT (mode,
4373                                            gen_rtx_ASHIFT (mode, sub,
4374                                                            shift))));
4375             }
4376           return 2;
4377         }
4378
4379       /* This will never be reached for Thumb2 because orn is a valid
4380          instruction. This is for Thumb1 and the ARM 32 bit cases.
4381
4382          x = y | constant (such that ~constant is a valid constant)
4383          Transform this to
4384          x = ~(~y & ~constant).
4385       */
4386       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4387         {
4388           if (generate)
4389             {
4390               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4391               emit_constant_insn (cond,
4392                                   gen_rtx_SET (VOIDmode, sub,
4393                                                gen_rtx_NOT (mode, source)));
4394               source = sub;
4395               if (subtargets)
4396                 sub = gen_reg_rtx (mode);
4397               emit_constant_insn (cond,
4398                                   gen_rtx_SET (VOIDmode, sub,
4399                                                gen_rtx_AND (mode, source,
4400                                                             GEN_INT (temp1))));
4401               emit_constant_insn (cond,
4402                                   gen_rtx_SET (VOIDmode, target,
4403                                                gen_rtx_NOT (mode, sub)));
4404             }
4405           return 3;
4406         }
4407       break;
4408
4409     case AND:
4410       /* See if two shifts will do 2 or more insn's worth of work.  */
4411       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4412         {
4413           HOST_WIDE_INT shift_mask = ((0xffffffff
4414                                        << (32 - clear_sign_bit_copies))
4415                                       & 0xffffffff);
4416
4417           if ((remainder | shift_mask) != 0xffffffff)
4418             {
4419               if (generate)
4420                 {
4421                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4422                   insns = arm_gen_constant (AND, mode, cond,
4423                                             remainder | shift_mask,
4424                                             new_src, source, subtargets, 1);
4425                   source = new_src;
4426                 }
4427               else
4428                 {
4429                   rtx targ = subtargets ? NULL_RTX : target;
4430                   insns = arm_gen_constant (AND, mode, cond,
4431                                             remainder | shift_mask,
4432                                             targ, source, subtargets, 0);
4433                 }
4434             }
4435
4436           if (generate)
4437             {
4438               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4439               rtx shift = GEN_INT (clear_sign_bit_copies);
4440
4441               emit_insn (gen_ashlsi3 (new_src, source, shift));
4442               emit_insn (gen_lshrsi3 (target, new_src, shift));
4443             }
4444
4445           return insns + 2;
4446         }
4447
4448       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4449         {
4450           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4451
4452           if ((remainder | shift_mask) != 0xffffffff)
4453             {
4454               if (generate)
4455                 {
4456                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4457
4458                   insns = arm_gen_constant (AND, mode, cond,
4459                                             remainder | shift_mask,
4460                                             new_src, source, subtargets, 1);
4461                   source = new_src;
4462                 }
4463               else
4464                 {
4465                   rtx targ = subtargets ? NULL_RTX : target;
4466
4467                   insns = arm_gen_constant (AND, mode, cond,
4468                                             remainder | shift_mask,
4469                                             targ, source, subtargets, 0);
4470                 }
4471             }
4472
4473           if (generate)
4474             {
4475               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4476               rtx shift = GEN_INT (clear_zero_bit_copies);
4477
4478               emit_insn (gen_lshrsi3 (new_src, source, shift));
4479               emit_insn (gen_ashlsi3 (target, new_src, shift));
4480             }
4481
4482           return insns + 2;
4483         }
4484
4485       break;
4486
4487     default:
4488       break;
4489     }
4490
4491   /* Calculate what the instruction sequences would be if we generated it
4492      normally, negated, or inverted.  */
4493   if (code == AND)
4494     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4495     insns = 99;
4496   else
4497     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4498
4499   if (can_negate)
4500     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4501                                             &neg_immediates);
4502   else
4503     neg_insns = 99;
4504
4505   if (can_invert || final_invert)
4506     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4507                                             &inv_immediates);
4508   else
4509     inv_insns = 99;
4510
4511   immediates = &pos_immediates;
4512
4513   /* Is the negated immediate sequence more efficient?  */
4514   if (neg_insns < insns && neg_insns <= inv_insns)
4515     {
4516       insns = neg_insns;
4517       immediates = &neg_immediates;
4518     }
4519   else
4520     can_negate = 0;
4521
4522   /* Is the inverted immediate sequence more efficient?
4523      We must allow for an extra NOT instruction for XOR operations, although
4524      there is some chance that the final 'mvn' will get optimized later.  */
4525   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4526     {
4527       insns = inv_insns;
4528       immediates = &inv_immediates;
4529     }
4530   else
4531     {
4532       can_invert = 0;
4533       final_invert = 0;
4534     }
4535
4536   /* Now output the chosen sequence as instructions.  */
4537   if (generate)
4538     {
4539       for (i = 0; i < insns; i++)
4540         {
4541           rtx new_src, temp1_rtx;
4542
4543           temp1 = immediates->i[i];
4544
4545           if (code == SET || code == MINUS)
4546             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4547           else if ((final_invert || i < (insns - 1)) && subtargets)
4548             new_src = gen_reg_rtx (mode);
4549           else
4550             new_src = target;
4551
4552           if (can_invert)
4553             temp1 = ~temp1;
4554           else if (can_negate)
4555             temp1 = -temp1;
4556
4557           temp1 = trunc_int_for_mode (temp1, mode);
4558           temp1_rtx = GEN_INT (temp1);
4559
4560           if (code == SET)
4561             ;
4562           else if (code == MINUS)
4563             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4564           else
4565             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4566
4567           emit_constant_insn (cond,
4568                               gen_rtx_SET (VOIDmode, new_src,
4569                                            temp1_rtx));
4570           source = new_src;
4571
4572           if (code == SET)
4573             {
4574               can_negate = can_invert;
4575               can_invert = 0;
4576               code = PLUS;
4577             }
4578           else if (code == MINUS)
4579             code = PLUS;
4580         }
4581     }
4582
4583   if (final_invert)
4584     {
4585       if (generate)
4586         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4587                                                gen_rtx_NOT (mode, source)));
4588       insns++;
4589     }
4590
4591   return insns;
4592 }
4593
4594 /* Canonicalize a comparison so that we are more likely to recognize it.
4595    This can be done for a few constant compares, where we can make the
4596    immediate value easier to load.  */
4597
4598 static void
4599 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4600                              bool op0_preserve_value)
4601 {
4602   machine_mode mode;
4603   unsigned HOST_WIDE_INT i, maxval;
4604
4605   mode = GET_MODE (*op0);
4606   if (mode == VOIDmode)
4607     mode = GET_MODE (*op1);
4608
4609   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4610
4611   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4612      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4613      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4614      for GTU/LEU in Thumb mode.  */
4615   if (mode == DImode)
4616     {
4617       rtx tem;
4618
4619       if (*code == GT || *code == LE
4620           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4621         {
4622           /* Missing comparison.  First try to use an available
4623              comparison.  */
4624           if (CONST_INT_P (*op1))
4625             {
4626               i = INTVAL (*op1);
4627               switch (*code)
4628                 {
4629                 case GT:
4630                 case LE:
4631                   if (i != maxval
4632                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4633                     {
4634                       *op1 = GEN_INT (i + 1);
4635                       *code = *code == GT ? GE : LT;
4636                       return;
4637                     }
4638                   break;
4639                 case GTU:
4640                 case LEU:
4641                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4642                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4643                     {
4644                       *op1 = GEN_INT (i + 1);
4645                       *code = *code == GTU ? GEU : LTU;
4646                       return;
4647                     }
4648                   break;
4649                 default:
4650                   gcc_unreachable ();
4651                 }
4652             }
4653
4654           /* If that did not work, reverse the condition.  */
4655           if (!op0_preserve_value)
4656             {
4657               tem = *op0;
4658               *op0 = *op1;
4659               *op1 = tem;
4660               *code = (int)swap_condition ((enum rtx_code)*code);
4661             }
4662         }
4663       return;
4664     }
4665
4666   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4667      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4668      to facilitate possible combining with a cmp into 'ands'.  */
4669   if (mode == SImode
4670       && GET_CODE (*op0) == ZERO_EXTEND
4671       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4672       && GET_MODE (XEXP (*op0, 0)) == QImode
4673       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4674       && subreg_lowpart_p (XEXP (*op0, 0))
4675       && *op1 == const0_rtx)
4676     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4677                         GEN_INT (255));
4678
4679   /* Comparisons smaller than DImode.  Only adjust comparisons against
4680      an out-of-range constant.  */
4681   if (!CONST_INT_P (*op1)
4682       || const_ok_for_arm (INTVAL (*op1))
4683       || const_ok_for_arm (- INTVAL (*op1)))
4684     return;
4685
4686   i = INTVAL (*op1);
4687
4688   switch (*code)
4689     {
4690     case EQ:
4691     case NE:
4692       return;
4693
4694     case GT:
4695     case LE:
4696       if (i != maxval
4697           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4698         {
4699           *op1 = GEN_INT (i + 1);
4700           *code = *code == GT ? GE : LT;
4701           return;
4702         }
4703       break;
4704
4705     case GE:
4706     case LT:
4707       if (i != ~maxval
4708           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4709         {
4710           *op1 = GEN_INT (i - 1);
4711           *code = *code == GE ? GT : LE;
4712           return;
4713         }
4714       break;
4715
4716     case GTU:
4717     case LEU:
4718       if (i != ~((unsigned HOST_WIDE_INT) 0)
4719           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4720         {
4721           *op1 = GEN_INT (i + 1);
4722           *code = *code == GTU ? GEU : LTU;
4723           return;
4724         }
4725       break;
4726
4727     case GEU:
4728     case LTU:
4729       if (i != 0
4730           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4731         {
4732           *op1 = GEN_INT (i - 1);
4733           *code = *code == GEU ? GTU : LEU;
4734           return;
4735         }
4736       break;
4737
4738     default:
4739       gcc_unreachable ();
4740     }
4741 }
4742
4743
4744 /* Define how to find the value returned by a function.  */
4745
4746 static rtx
4747 arm_function_value(const_tree type, const_tree func,
4748                    bool outgoing ATTRIBUTE_UNUSED)
4749 {
4750   machine_mode mode;
4751   int unsignedp ATTRIBUTE_UNUSED;
4752   rtx r ATTRIBUTE_UNUSED;
4753
4754   mode = TYPE_MODE (type);
4755
4756   if (TARGET_AAPCS_BASED)
4757     return aapcs_allocate_return_reg (mode, type, func);
4758
4759   /* Promote integer types.  */
4760   if (INTEGRAL_TYPE_P (type))
4761     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4762
4763   /* Promotes small structs returned in a register to full-word size
4764      for big-endian AAPCS.  */
4765   if (arm_return_in_msb (type))
4766     {
4767       HOST_WIDE_INT size = int_size_in_bytes (type);
4768       if (size % UNITS_PER_WORD != 0)
4769         {
4770           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4771           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4772         }
4773     }
4774
4775   return arm_libcall_value_1 (mode);
4776 }
4777
4778 /* libcall hashtable helpers.  */
4779
4780 struct libcall_hasher : typed_noop_remove <rtx_def>
4781 {
4782   typedef rtx_def value_type;
4783   typedef rtx_def compare_type;
4784   static inline hashval_t hash (const value_type *);
4785   static inline bool equal (const value_type *, const compare_type *);
4786   static inline void remove (value_type *);
4787 };
4788
4789 inline bool
4790 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4791 {
4792   return rtx_equal_p (p1, p2);
4793 }
4794
4795 inline hashval_t
4796 libcall_hasher::hash (const value_type *p1)
4797 {
4798   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4799 }
4800
4801 typedef hash_table<libcall_hasher> libcall_table_type;
4802
4803 static void
4804 add_libcall (libcall_table_type *htab, rtx libcall)
4805 {
4806   *htab->find_slot (libcall, INSERT) = libcall;
4807 }
4808
4809 static bool
4810 arm_libcall_uses_aapcs_base (const_rtx libcall)
4811 {
4812   static bool init_done = false;
4813   static libcall_table_type *libcall_htab = NULL;
4814
4815   if (!init_done)
4816     {
4817       init_done = true;
4818
4819       libcall_htab = new libcall_table_type (31);
4820       add_libcall (libcall_htab,
4821                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4822       add_libcall (libcall_htab,
4823                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4824       add_libcall (libcall_htab,
4825                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4826       add_libcall (libcall_htab,
4827                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4828
4829       add_libcall (libcall_htab,
4830                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4831       add_libcall (libcall_htab,
4832                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4833       add_libcall (libcall_htab,
4834                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4835       add_libcall (libcall_htab,
4836                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4837
4838       add_libcall (libcall_htab,
4839                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4840       add_libcall (libcall_htab,
4841                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4842       add_libcall (libcall_htab,
4843                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4844       add_libcall (libcall_htab,
4845                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4846       add_libcall (libcall_htab,
4847                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4848       add_libcall (libcall_htab,
4849                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4850       add_libcall (libcall_htab,
4851                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4852       add_libcall (libcall_htab,
4853                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4854
4855       /* Values from double-precision helper functions are returned in core
4856          registers if the selected core only supports single-precision
4857          arithmetic, even if we are using the hard-float ABI.  The same is
4858          true for single-precision helpers, but we will never be using the
4859          hard-float ABI on a CPU which doesn't support single-precision
4860          operations in hardware.  */
4861       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4862       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4863       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4864       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4865       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4866       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4867       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4868       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4869       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4870       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4871       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4872       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4873                                                         SFmode));
4874       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4875                                                         DFmode));
4876     }
4877
4878   return libcall && libcall_htab->find (libcall) != NULL;
4879 }
4880
4881 static rtx
4882 arm_libcall_value_1 (machine_mode mode)
4883 {
4884   if (TARGET_AAPCS_BASED)
4885     return aapcs_libcall_value (mode);
4886   else if (TARGET_IWMMXT_ABI
4887            && arm_vector_mode_supported_p (mode))
4888     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4889   else
4890     return gen_rtx_REG (mode, ARG_REGISTER (1));
4891 }
4892
4893 /* Define how to find the value returned by a library function
4894    assuming the value has mode MODE.  */
4895
4896 static rtx
4897 arm_libcall_value (machine_mode mode, const_rtx libcall)
4898 {
4899   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4900       && GET_MODE_CLASS (mode) == MODE_FLOAT)
4901     {
4902       /* The following libcalls return their result in integer registers,
4903          even though they return a floating point value.  */
4904       if (arm_libcall_uses_aapcs_base (libcall))
4905         return gen_rtx_REG (mode, ARG_REGISTER(1));
4906
4907     }
4908
4909   return arm_libcall_value_1 (mode);
4910 }
4911
4912 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
4913
4914 static bool
4915 arm_function_value_regno_p (const unsigned int regno)
4916 {
4917   if (regno == ARG_REGISTER (1)
4918       || (TARGET_32BIT
4919           && TARGET_AAPCS_BASED
4920           && TARGET_VFP
4921           && TARGET_HARD_FLOAT
4922           && regno == FIRST_VFP_REGNUM)
4923       || (TARGET_IWMMXT_ABI
4924           && regno == FIRST_IWMMXT_REGNUM))
4925     return true;
4926
4927   return false;
4928 }
4929
4930 /* Determine the amount of memory needed to store the possible return
4931    registers of an untyped call.  */
4932 int
4933 arm_apply_result_size (void)
4934 {
4935   int size = 16;
4936
4937   if (TARGET_32BIT)
4938     {
4939       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4940         size += 32;
4941       if (TARGET_IWMMXT_ABI)
4942         size += 8;
4943     }
4944
4945   return size;
4946 }
4947
4948 /* Decide whether TYPE should be returned in memory (true)
4949    or in a register (false).  FNTYPE is the type of the function making
4950    the call.  */
4951 static bool
4952 arm_return_in_memory (const_tree type, const_tree fntype)
4953 {
4954   HOST_WIDE_INT size;
4955
4956   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
4957
4958   if (TARGET_AAPCS_BASED)
4959     {
4960       /* Simple, non-aggregate types (ie not including vectors and
4961          complex) are always returned in a register (or registers).
4962          We don't care about which register here, so we can short-cut
4963          some of the detail.  */
4964       if (!AGGREGATE_TYPE_P (type)
4965           && TREE_CODE (type) != VECTOR_TYPE
4966           && TREE_CODE (type) != COMPLEX_TYPE)
4967         return false;
4968
4969       /* Any return value that is no larger than one word can be
4970          returned in r0.  */
4971       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4972         return false;
4973
4974       /* Check any available co-processors to see if they accept the
4975          type as a register candidate (VFP, for example, can return
4976          some aggregates in consecutive registers).  These aren't
4977          available if the call is variadic.  */
4978       if (aapcs_select_return_coproc (type, fntype) >= 0)
4979         return false;
4980
4981       /* Vector values should be returned using ARM registers, not
4982          memory (unless they're over 16 bytes, which will break since
4983          we only have four call-clobbered registers to play with).  */
4984       if (TREE_CODE (type) == VECTOR_TYPE)
4985         return (size < 0 || size > (4 * UNITS_PER_WORD));
4986
4987       /* The rest go in memory.  */
4988       return true;
4989     }
4990
4991   if (TREE_CODE (type) == VECTOR_TYPE)
4992     return (size < 0 || size > (4 * UNITS_PER_WORD));
4993
4994   if (!AGGREGATE_TYPE_P (type) &&
4995       (TREE_CODE (type) != VECTOR_TYPE))
4996     /* All simple types are returned in registers.  */
4997     return false;
4998
4999   if (arm_abi != ARM_ABI_APCS)
5000     {
5001       /* ATPCS and later return aggregate types in memory only if they are
5002          larger than a word (or are variable size).  */
5003       return (size < 0 || size > UNITS_PER_WORD);
5004     }
5005
5006   /* For the arm-wince targets we choose to be compatible with Microsoft's
5007      ARM and Thumb compilers, which always return aggregates in memory.  */
5008 #ifndef ARM_WINCE
5009   /* All structures/unions bigger than one word are returned in memory.
5010      Also catch the case where int_size_in_bytes returns -1.  In this case
5011      the aggregate is either huge or of variable size, and in either case
5012      we will want to return it via memory and not in a register.  */
5013   if (size < 0 || size > UNITS_PER_WORD)
5014     return true;
5015
5016   if (TREE_CODE (type) == RECORD_TYPE)
5017     {
5018       tree field;
5019
5020       /* For a struct the APCS says that we only return in a register
5021          if the type is 'integer like' and every addressable element
5022          has an offset of zero.  For practical purposes this means
5023          that the structure can have at most one non bit-field element
5024          and that this element must be the first one in the structure.  */
5025
5026       /* Find the first field, ignoring non FIELD_DECL things which will
5027          have been created by C++.  */
5028       for (field = TYPE_FIELDS (type);
5029            field && TREE_CODE (field) != FIELD_DECL;
5030            field = DECL_CHAIN (field))
5031         continue;
5032
5033       if (field == NULL)
5034         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5035
5036       /* Check that the first field is valid for returning in a register.  */
5037
5038       /* ... Floats are not allowed */
5039       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5040         return true;
5041
5042       /* ... Aggregates that are not themselves valid for returning in
5043          a register are not allowed.  */
5044       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5045         return true;
5046
5047       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5048          since they are not addressable.  */
5049       for (field = DECL_CHAIN (field);
5050            field;
5051            field = DECL_CHAIN (field))
5052         {
5053           if (TREE_CODE (field) != FIELD_DECL)
5054             continue;
5055
5056           if (!DECL_BIT_FIELD_TYPE (field))
5057             return true;
5058         }
5059
5060       return false;
5061     }
5062
5063   if (TREE_CODE (type) == UNION_TYPE)
5064     {
5065       tree field;
5066
5067       /* Unions can be returned in registers if every element is
5068          integral, or can be returned in an integer register.  */
5069       for (field = TYPE_FIELDS (type);
5070            field;
5071            field = DECL_CHAIN (field))
5072         {
5073           if (TREE_CODE (field) != FIELD_DECL)
5074             continue;
5075
5076           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5077             return true;
5078
5079           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5080             return true;
5081         }
5082
5083       return false;
5084     }
5085 #endif /* not ARM_WINCE */
5086
5087   /* Return all other types in memory.  */
5088   return true;
5089 }
5090
5091 const struct pcs_attribute_arg
5092 {
5093   const char *arg;
5094   enum arm_pcs value;
5095 } pcs_attribute_args[] =
5096   {
5097     {"aapcs", ARM_PCS_AAPCS},
5098     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5099 #if 0
5100     /* We could recognize these, but changes would be needed elsewhere
5101      * to implement them.  */
5102     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5103     {"atpcs", ARM_PCS_ATPCS},
5104     {"apcs", ARM_PCS_APCS},
5105 #endif
5106     {NULL, ARM_PCS_UNKNOWN}
5107   };
5108
5109 static enum arm_pcs
5110 arm_pcs_from_attribute (tree attr)
5111 {
5112   const struct pcs_attribute_arg *ptr;
5113   const char *arg;
5114
5115   /* Get the value of the argument.  */
5116   if (TREE_VALUE (attr) == NULL_TREE
5117       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5118     return ARM_PCS_UNKNOWN;
5119
5120   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5121
5122   /* Check it against the list of known arguments.  */
5123   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5124     if (streq (arg, ptr->arg))
5125       return ptr->value;
5126
5127   /* An unrecognized interrupt type.  */
5128   return ARM_PCS_UNKNOWN;
5129 }
5130
5131 /* Get the PCS variant to use for this call.  TYPE is the function's type
5132    specification, DECL is the specific declartion.  DECL may be null if
5133    the call could be indirect or if this is a library call.  */
5134 static enum arm_pcs
5135 arm_get_pcs_model (const_tree type, const_tree decl)
5136 {
5137   bool user_convention = false;
5138   enum arm_pcs user_pcs = arm_pcs_default;
5139   tree attr;
5140
5141   gcc_assert (type);
5142
5143   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5144   if (attr)
5145     {
5146       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5147       user_convention = true;
5148     }
5149
5150   if (TARGET_AAPCS_BASED)
5151     {
5152       /* Detect varargs functions.  These always use the base rules
5153          (no argument is ever a candidate for a co-processor
5154          register).  */
5155       bool base_rules = stdarg_p (type);
5156
5157       if (user_convention)
5158         {
5159           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5160             sorry ("non-AAPCS derived PCS variant");
5161           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5162             error ("variadic functions must use the base AAPCS variant");
5163         }
5164
5165       if (base_rules)
5166         return ARM_PCS_AAPCS;
5167       else if (user_convention)
5168         return user_pcs;
5169       else if (decl && flag_unit_at_a_time)
5170         {
5171           /* Local functions never leak outside this compilation unit,
5172              so we are free to use whatever conventions are
5173              appropriate.  */
5174           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5175           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5176           if (i && i->local)
5177             return ARM_PCS_AAPCS_LOCAL;
5178         }
5179     }
5180   else if (user_convention && user_pcs != arm_pcs_default)
5181     sorry ("PCS variant");
5182
5183   /* For everything else we use the target's default.  */
5184   return arm_pcs_default;
5185 }
5186
5187
5188 static void
5189 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5190                     const_tree fntype ATTRIBUTE_UNUSED,
5191                     rtx libcall ATTRIBUTE_UNUSED,
5192                     const_tree fndecl ATTRIBUTE_UNUSED)
5193 {
5194   /* Record the unallocated VFP registers.  */
5195   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5196   pcum->aapcs_vfp_reg_alloc = 0;
5197 }
5198
5199 /* Walk down the type tree of TYPE counting consecutive base elements.
5200    If *MODEP is VOIDmode, then set it to the first valid floating point
5201    type.  If a non-floating point type is found, or if a floating point
5202    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5203    otherwise return the count in the sub-tree.  */
5204 static int
5205 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5206 {
5207   machine_mode mode;
5208   HOST_WIDE_INT size;
5209
5210   switch (TREE_CODE (type))
5211     {
5212     case REAL_TYPE:
5213       mode = TYPE_MODE (type);
5214       if (mode != DFmode && mode != SFmode)
5215         return -1;
5216
5217       if (*modep == VOIDmode)
5218         *modep = mode;
5219
5220       if (*modep == mode)
5221         return 1;
5222
5223       break;
5224
5225     case COMPLEX_TYPE:
5226       mode = TYPE_MODE (TREE_TYPE (type));
5227       if (mode != DFmode && mode != SFmode)
5228         return -1;
5229
5230       if (*modep == VOIDmode)
5231         *modep = mode;
5232
5233       if (*modep == mode)
5234         return 2;
5235
5236       break;
5237
5238     case VECTOR_TYPE:
5239       /* Use V2SImode and V4SImode as representatives of all 64-bit
5240          and 128-bit vector types, whether or not those modes are
5241          supported with the present options.  */
5242       size = int_size_in_bytes (type);
5243       switch (size)
5244         {
5245         case 8:
5246           mode = V2SImode;
5247           break;
5248         case 16:
5249           mode = V4SImode;
5250           break;
5251         default:
5252           return -1;
5253         }
5254
5255       if (*modep == VOIDmode)
5256         *modep = mode;
5257
5258       /* Vector modes are considered to be opaque: two vectors are
5259          equivalent for the purposes of being homogeneous aggregates
5260          if they are the same size.  */
5261       if (*modep == mode)
5262         return 1;
5263
5264       break;
5265
5266     case ARRAY_TYPE:
5267       {
5268         int count;
5269         tree index = TYPE_DOMAIN (type);
5270
5271         /* Can't handle incomplete types nor sizes that are not
5272            fixed.  */
5273         if (!COMPLETE_TYPE_P (type)
5274             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5275           return -1;
5276
5277         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5278         if (count == -1
5279             || !index
5280             || !TYPE_MAX_VALUE (index)
5281             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5282             || !TYPE_MIN_VALUE (index)
5283             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5284             || count < 0)
5285           return -1;
5286
5287         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5288                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5289
5290         /* There must be no padding.  */
5291         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5292           return -1;
5293
5294         return count;
5295       }
5296
5297     case RECORD_TYPE:
5298       {
5299         int count = 0;
5300         int sub_count;
5301         tree field;
5302
5303         /* Can't handle incomplete types nor sizes that are not
5304            fixed.  */
5305         if (!COMPLETE_TYPE_P (type)
5306             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5307           return -1;
5308
5309         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5310           {
5311             if (TREE_CODE (field) != FIELD_DECL)
5312               continue;
5313
5314             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5315             if (sub_count < 0)
5316               return -1;
5317             count += sub_count;
5318           }
5319
5320         /* There must be no padding.  */
5321         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5322           return -1;
5323
5324         return count;
5325       }
5326
5327     case UNION_TYPE:
5328     case QUAL_UNION_TYPE:
5329       {
5330         /* These aren't very interesting except in a degenerate case.  */
5331         int count = 0;
5332         int sub_count;
5333         tree field;
5334
5335         /* Can't handle incomplete types nor sizes that are not
5336            fixed.  */
5337         if (!COMPLETE_TYPE_P (type)
5338             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5339           return -1;
5340
5341         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5342           {
5343             if (TREE_CODE (field) != FIELD_DECL)
5344               continue;
5345
5346             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5347             if (sub_count < 0)
5348               return -1;
5349             count = count > sub_count ? count : sub_count;
5350           }
5351
5352         /* There must be no padding.  */
5353         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5354           return -1;
5355
5356         return count;
5357       }
5358
5359     default:
5360       break;
5361     }
5362
5363   return -1;
5364 }
5365
5366 /* Return true if PCS_VARIANT should use VFP registers.  */
5367 static bool
5368 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5369 {
5370   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5371     {
5372       static bool seen_thumb1_vfp = false;
5373
5374       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5375         {
5376           sorry ("Thumb-1 hard-float VFP ABI");
5377           /* sorry() is not immediately fatal, so only display this once.  */
5378           seen_thumb1_vfp = true;
5379         }
5380
5381       return true;
5382     }
5383
5384   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5385     return false;
5386
5387   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5388           (TARGET_VFP_DOUBLE || !is_double));
5389 }
5390
5391 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5392    suitable for passing or returning in VFP registers for the PCS
5393    variant selected.  If it is, then *BASE_MODE is updated to contain
5394    a machine mode describing each element of the argument's type and
5395    *COUNT to hold the number of such elements.  */
5396 static bool
5397 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5398                                        machine_mode mode, const_tree type,
5399                                        machine_mode *base_mode, int *count)
5400 {
5401   machine_mode new_mode = VOIDmode;
5402
5403   /* If we have the type information, prefer that to working things
5404      out from the mode.  */
5405   if (type)
5406     {
5407       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5408
5409       if (ag_count > 0 && ag_count <= 4)
5410         *count = ag_count;
5411       else
5412         return false;
5413     }
5414   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5415            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5416            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5417     {
5418       *count = 1;
5419       new_mode = mode;
5420     }
5421   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5422     {
5423       *count = 2;
5424       new_mode = (mode == DCmode ? DFmode : SFmode);
5425     }
5426   else
5427     return false;
5428
5429
5430   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5431     return false;
5432
5433   *base_mode = new_mode;
5434   return true;
5435 }
5436
5437 static bool
5438 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5439                                machine_mode mode, const_tree type)
5440 {
5441   int count ATTRIBUTE_UNUSED;
5442   machine_mode ag_mode ATTRIBUTE_UNUSED;
5443
5444   if (!use_vfp_abi (pcs_variant, false))
5445     return false;
5446   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5447                                                 &ag_mode, &count);
5448 }
5449
5450 static bool
5451 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5452                              const_tree type)
5453 {
5454   if (!use_vfp_abi (pcum->pcs_variant, false))
5455     return false;
5456
5457   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5458                                                 &pcum->aapcs_vfp_rmode,
5459                                                 &pcum->aapcs_vfp_rcount);
5460 }
5461
5462 static bool
5463 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5464                     const_tree type  ATTRIBUTE_UNUSED)
5465 {
5466   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5467   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5468   int regno;
5469
5470   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5471     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5472       {
5473         pcum->aapcs_vfp_reg_alloc = mask << regno;
5474         if (mode == BLKmode
5475             || (mode == TImode && ! TARGET_NEON)
5476             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5477           {
5478             int i;
5479             int rcount = pcum->aapcs_vfp_rcount;
5480             int rshift = shift;
5481             machine_mode rmode = pcum->aapcs_vfp_rmode;
5482             rtx par;
5483             if (!TARGET_NEON)
5484               {
5485                 /* Avoid using unsupported vector modes.  */
5486                 if (rmode == V2SImode)
5487                   rmode = DImode;
5488                 else if (rmode == V4SImode)
5489                   {
5490                     rmode = DImode;
5491                     rcount *= 2;
5492                     rshift /= 2;
5493                   }
5494               }
5495             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5496             for (i = 0; i < rcount; i++)
5497               {
5498                 rtx tmp = gen_rtx_REG (rmode,
5499                                        FIRST_VFP_REGNUM + regno + i * rshift);
5500                 tmp = gen_rtx_EXPR_LIST
5501                   (VOIDmode, tmp,
5502                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5503                 XVECEXP (par, 0, i) = tmp;
5504               }
5505
5506             pcum->aapcs_reg = par;
5507           }
5508         else
5509           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5510         return true;
5511       }
5512   return false;
5513 }
5514
5515 static rtx
5516 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5517                                machine_mode mode,
5518                                const_tree type ATTRIBUTE_UNUSED)
5519 {
5520   if (!use_vfp_abi (pcs_variant, false))
5521     return NULL;
5522
5523   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5524     {
5525       int count;
5526       machine_mode ag_mode;
5527       int i;
5528       rtx par;
5529       int shift;
5530
5531       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5532                                              &ag_mode, &count);
5533
5534       if (!TARGET_NEON)
5535         {
5536           if (ag_mode == V2SImode)
5537             ag_mode = DImode;
5538           else if (ag_mode == V4SImode)
5539             {
5540               ag_mode = DImode;
5541               count *= 2;
5542             }
5543         }
5544       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5545       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5546       for (i = 0; i < count; i++)
5547         {
5548           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5549           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5550                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5551           XVECEXP (par, 0, i) = tmp;
5552         }
5553
5554       return par;
5555     }
5556
5557   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5558 }
5559
5560 static void
5561 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5562                    machine_mode mode  ATTRIBUTE_UNUSED,
5563                    const_tree type  ATTRIBUTE_UNUSED)
5564 {
5565   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5566   pcum->aapcs_vfp_reg_alloc = 0;
5567   return;
5568 }
5569
5570 #define AAPCS_CP(X)                             \
5571   {                                             \
5572     aapcs_ ## X ## _cum_init,                   \
5573     aapcs_ ## X ## _is_call_candidate,          \
5574     aapcs_ ## X ## _allocate,                   \
5575     aapcs_ ## X ## _is_return_candidate,        \
5576     aapcs_ ## X ## _allocate_return_reg,        \
5577     aapcs_ ## X ## _advance                     \
5578   }
5579
5580 /* Table of co-processors that can be used to pass arguments in
5581    registers.  Idealy no arugment should be a candidate for more than
5582    one co-processor table entry, but the table is processed in order
5583    and stops after the first match.  If that entry then fails to put
5584    the argument into a co-processor register, the argument will go on
5585    the stack.  */
5586 static struct
5587 {
5588   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5589   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5590
5591   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5592      BLKmode) is a candidate for this co-processor's registers; this
5593      function should ignore any position-dependent state in
5594      CUMULATIVE_ARGS and only use call-type dependent information.  */
5595   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5596
5597   /* Return true if the argument does get a co-processor register; it
5598      should set aapcs_reg to an RTX of the register allocated as is
5599      required for a return from FUNCTION_ARG.  */
5600   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5601
5602   /* Return true if a result of mode MODE (or type TYPE if MODE is
5603      BLKmode) is can be returned in this co-processor's registers.  */
5604   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5605
5606   /* Allocate and return an RTX element to hold the return type of a
5607      call, this routine must not fail and will only be called if
5608      is_return_candidate returned true with the same parameters.  */
5609   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5610
5611   /* Finish processing this argument and prepare to start processing
5612      the next one.  */
5613   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5614 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5615   {
5616     AAPCS_CP(vfp)
5617   };
5618
5619 #undef AAPCS_CP
5620
5621 static int
5622 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5623                           const_tree type)
5624 {
5625   int i;
5626
5627   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5628     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5629       return i;
5630
5631   return -1;
5632 }
5633
5634 static int
5635 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5636 {
5637   /* We aren't passed a decl, so we can't check that a call is local.
5638      However, it isn't clear that that would be a win anyway, since it
5639      might limit some tail-calling opportunities.  */
5640   enum arm_pcs pcs_variant;
5641
5642   if (fntype)
5643     {
5644       const_tree fndecl = NULL_TREE;
5645
5646       if (TREE_CODE (fntype) == FUNCTION_DECL)
5647         {
5648           fndecl = fntype;
5649           fntype = TREE_TYPE (fntype);
5650         }
5651
5652       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5653     }
5654   else
5655     pcs_variant = arm_pcs_default;
5656
5657   if (pcs_variant != ARM_PCS_AAPCS)
5658     {
5659       int i;
5660
5661       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5662         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5663                                                         TYPE_MODE (type),
5664                                                         type))
5665           return i;
5666     }
5667   return -1;
5668 }
5669
5670 static rtx
5671 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5672                            const_tree fntype)
5673 {
5674   /* We aren't passed a decl, so we can't check that a call is local.
5675      However, it isn't clear that that would be a win anyway, since it
5676      might limit some tail-calling opportunities.  */
5677   enum arm_pcs pcs_variant;
5678   int unsignedp ATTRIBUTE_UNUSED;
5679
5680   if (fntype)
5681     {
5682       const_tree fndecl = NULL_TREE;
5683
5684       if (TREE_CODE (fntype) == FUNCTION_DECL)
5685         {
5686           fndecl = fntype;
5687           fntype = TREE_TYPE (fntype);
5688         }
5689
5690       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5691     }
5692   else
5693     pcs_variant = arm_pcs_default;
5694
5695   /* Promote integer types.  */
5696   if (type && INTEGRAL_TYPE_P (type))
5697     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5698
5699   if (pcs_variant != ARM_PCS_AAPCS)
5700     {
5701       int i;
5702
5703       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5704         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5705                                                         type))
5706           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5707                                                              mode, type);
5708     }
5709
5710   /* Promotes small structs returned in a register to full-word size
5711      for big-endian AAPCS.  */
5712   if (type && arm_return_in_msb (type))
5713     {
5714       HOST_WIDE_INT size = int_size_in_bytes (type);
5715       if (size % UNITS_PER_WORD != 0)
5716         {
5717           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5718           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5719         }
5720     }
5721
5722   return gen_rtx_REG (mode, R0_REGNUM);
5723 }
5724
5725 static rtx
5726 aapcs_libcall_value (machine_mode mode)
5727 {
5728   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5729       && GET_MODE_SIZE (mode) <= 4)
5730     mode = SImode;
5731
5732   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5733 }
5734
5735 /* Lay out a function argument using the AAPCS rules.  The rule
5736    numbers referred to here are those in the AAPCS.  */
5737 static void
5738 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5739                   const_tree type, bool named)
5740 {
5741   int nregs, nregs2;
5742   int ncrn;
5743
5744   /* We only need to do this once per argument.  */
5745   if (pcum->aapcs_arg_processed)
5746     return;
5747
5748   pcum->aapcs_arg_processed = true;
5749
5750   /* Special case: if named is false then we are handling an incoming
5751      anonymous argument which is on the stack.  */
5752   if (!named)
5753     return;
5754
5755   /* Is this a potential co-processor register candidate?  */
5756   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5757     {
5758       int slot = aapcs_select_call_coproc (pcum, mode, type);
5759       pcum->aapcs_cprc_slot = slot;
5760
5761       /* We don't have to apply any of the rules from part B of the
5762          preparation phase, these are handled elsewhere in the
5763          compiler.  */
5764
5765       if (slot >= 0)
5766         {
5767           /* A Co-processor register candidate goes either in its own
5768              class of registers or on the stack.  */
5769           if (!pcum->aapcs_cprc_failed[slot])
5770             {
5771               /* C1.cp - Try to allocate the argument to co-processor
5772                  registers.  */
5773               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5774                 return;
5775
5776               /* C2.cp - Put the argument on the stack and note that we
5777                  can't assign any more candidates in this slot.  We also
5778                  need to note that we have allocated stack space, so that
5779                  we won't later try to split a non-cprc candidate between
5780                  core registers and the stack.  */
5781               pcum->aapcs_cprc_failed[slot] = true;
5782               pcum->can_split = false;
5783             }
5784
5785           /* We didn't get a register, so this argument goes on the
5786              stack.  */
5787           gcc_assert (pcum->can_split == false);
5788           return;
5789         }
5790     }
5791
5792   /* C3 - For double-word aligned arguments, round the NCRN up to the
5793      next even number.  */
5794   ncrn = pcum->aapcs_ncrn;
5795   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5796     ncrn++;
5797
5798   nregs = ARM_NUM_REGS2(mode, type);
5799
5800   /* Sigh, this test should really assert that nregs > 0, but a GCC
5801      extension allows empty structs and then gives them empty size; it
5802      then allows such a structure to be passed by value.  For some of
5803      the code below we have to pretend that such an argument has
5804      non-zero size so that we 'locate' it correctly either in
5805      registers or on the stack.  */
5806   gcc_assert (nregs >= 0);
5807
5808   nregs2 = nregs ? nregs : 1;
5809
5810   /* C4 - Argument fits entirely in core registers.  */
5811   if (ncrn + nregs2 <= NUM_ARG_REGS)
5812     {
5813       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5814       pcum->aapcs_next_ncrn = ncrn + nregs;
5815       return;
5816     }
5817
5818   /* C5 - Some core registers left and there are no arguments already
5819      on the stack: split this argument between the remaining core
5820      registers and the stack.  */
5821   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5822     {
5823       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5824       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5825       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5826       return;
5827     }
5828
5829   /* C6 - NCRN is set to 4.  */
5830   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5831
5832   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5833   return;
5834 }
5835
5836 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5837    for a call to a function whose data type is FNTYPE.
5838    For a library call, FNTYPE is NULL.  */
5839 void
5840 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5841                           rtx libname,
5842                           tree fndecl ATTRIBUTE_UNUSED)
5843 {
5844   /* Long call handling.  */
5845   if (fntype)
5846     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5847   else
5848     pcum->pcs_variant = arm_pcs_default;
5849
5850   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5851     {
5852       if (arm_libcall_uses_aapcs_base (libname))
5853         pcum->pcs_variant = ARM_PCS_AAPCS;
5854
5855       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5856       pcum->aapcs_reg = NULL_RTX;
5857       pcum->aapcs_partial = 0;
5858       pcum->aapcs_arg_processed = false;
5859       pcum->aapcs_cprc_slot = -1;
5860       pcum->can_split = true;
5861
5862       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5863         {
5864           int i;
5865
5866           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5867             {
5868               pcum->aapcs_cprc_failed[i] = false;
5869               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5870             }
5871         }
5872       return;
5873     }
5874
5875   /* Legacy ABIs */
5876
5877   /* On the ARM, the offset starts at 0.  */
5878   pcum->nregs = 0;
5879   pcum->iwmmxt_nregs = 0;
5880   pcum->can_split = true;
5881
5882   /* Varargs vectors are treated the same as long long.
5883      named_count avoids having to change the way arm handles 'named' */
5884   pcum->named_count = 0;
5885   pcum->nargs = 0;
5886
5887   if (TARGET_REALLY_IWMMXT && fntype)
5888     {
5889       tree fn_arg;
5890
5891       for (fn_arg = TYPE_ARG_TYPES (fntype);
5892            fn_arg;
5893            fn_arg = TREE_CHAIN (fn_arg))
5894         pcum->named_count += 1;
5895
5896       if (! pcum->named_count)
5897         pcum->named_count = INT_MAX;
5898     }
5899 }
5900
5901 /* Return true if we use LRA instead of reload pass.  */
5902 static bool
5903 arm_lra_p (void)
5904 {
5905   return arm_lra_flag;
5906 }
5907
5908 /* Return true if mode/type need doubleword alignment.  */
5909 static bool
5910 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5911 {
5912   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5913           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5914 }
5915
5916
5917 /* Determine where to put an argument to a function.
5918    Value is zero to push the argument on the stack,
5919    or a hard register in which to store the argument.
5920
5921    MODE is the argument's machine mode.
5922    TYPE is the data type of the argument (as a tree).
5923     This is null for libcalls where that information may
5924     not be available.
5925    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5926     the preceding args and about the function being called.
5927    NAMED is nonzero if this argument is a named parameter
5928     (otherwise it is an extra parameter matching an ellipsis).
5929
5930    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5931    other arguments are passed on the stack.  If (NAMED == 0) (which happens
5932    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5933    defined), say it is passed in the stack (function_prologue will
5934    indeed make it pass in the stack if necessary).  */
5935
5936 static rtx
5937 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5938                   const_tree type, bool named)
5939 {
5940   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5941   int nregs;
5942
5943   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
5944      a call insn (op3 of a call_value insn).  */
5945   if (mode == VOIDmode)
5946     return const0_rtx;
5947
5948   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5949     {
5950       aapcs_layout_arg (pcum, mode, type, named);
5951       return pcum->aapcs_reg;
5952     }
5953
5954   /* Varargs vectors are treated the same as long long.
5955      named_count avoids having to change the way arm handles 'named' */
5956   if (TARGET_IWMMXT_ABI
5957       && arm_vector_mode_supported_p (mode)
5958       && pcum->named_count > pcum->nargs + 1)
5959     {
5960       if (pcum->iwmmxt_nregs <= 9)
5961         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5962       else
5963         {
5964           pcum->can_split = false;
5965           return NULL_RTX;
5966         }
5967     }
5968
5969   /* Put doubleword aligned quantities in even register pairs.  */
5970   if (pcum->nregs & 1
5971       && ARM_DOUBLEWORD_ALIGN
5972       && arm_needs_doubleword_align (mode, type))
5973     pcum->nregs++;
5974
5975   /* Only allow splitting an arg between regs and memory if all preceding
5976      args were allocated to regs.  For args passed by reference we only count
5977      the reference pointer.  */
5978   if (pcum->can_split)
5979     nregs = 1;
5980   else
5981     nregs = ARM_NUM_REGS2 (mode, type);
5982
5983   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5984     return NULL_RTX;
5985
5986   return gen_rtx_REG (mode, pcum->nregs);
5987 }
5988
5989 static unsigned int
5990 arm_function_arg_boundary (machine_mode mode, const_tree type)
5991 {
5992   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5993           ? DOUBLEWORD_ALIGNMENT
5994           : PARM_BOUNDARY);
5995 }
5996
5997 static int
5998 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
5999                        tree type, bool named)
6000 {
6001   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6002   int nregs = pcum->nregs;
6003
6004   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6005     {
6006       aapcs_layout_arg (pcum, mode, type, named);
6007       return pcum->aapcs_partial;
6008     }
6009
6010   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6011     return 0;
6012
6013   if (NUM_ARG_REGS > nregs
6014       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6015       && pcum->can_split)
6016     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6017
6018   return 0;
6019 }
6020
6021 /* Update the data in PCUM to advance over an argument
6022    of mode MODE and data type TYPE.
6023    (TYPE is null for libcalls where that information may not be available.)  */
6024
6025 static void
6026 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6027                           const_tree type, bool named)
6028 {
6029   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6030
6031   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6032     {
6033       aapcs_layout_arg (pcum, mode, type, named);
6034
6035       if (pcum->aapcs_cprc_slot >= 0)
6036         {
6037           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6038                                                               type);
6039           pcum->aapcs_cprc_slot = -1;
6040         }
6041
6042       /* Generic stuff.  */
6043       pcum->aapcs_arg_processed = false;
6044       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6045       pcum->aapcs_reg = NULL_RTX;
6046       pcum->aapcs_partial = 0;
6047     }
6048   else
6049     {
6050       pcum->nargs += 1;
6051       if (arm_vector_mode_supported_p (mode)
6052           && pcum->named_count > pcum->nargs
6053           && TARGET_IWMMXT_ABI)
6054         pcum->iwmmxt_nregs += 1;
6055       else
6056         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6057     }
6058 }
6059
6060 /* Variable sized types are passed by reference.  This is a GCC
6061    extension to the ARM ABI.  */
6062
6063 static bool
6064 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6065                        machine_mode mode ATTRIBUTE_UNUSED,
6066                        const_tree type, bool named ATTRIBUTE_UNUSED)
6067 {
6068   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6069 }
6070 \f
6071 /* Encode the current state of the #pragma [no_]long_calls.  */
6072 typedef enum
6073 {
6074   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6075   LONG,         /* #pragma long_calls is in effect.  */
6076   SHORT         /* #pragma no_long_calls is in effect.  */
6077 } arm_pragma_enum;
6078
6079 static arm_pragma_enum arm_pragma_long_calls = OFF;
6080
6081 void
6082 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6083 {
6084   arm_pragma_long_calls = LONG;
6085 }
6086
6087 void
6088 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6089 {
6090   arm_pragma_long_calls = SHORT;
6091 }
6092
6093 void
6094 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6095 {
6096   arm_pragma_long_calls = OFF;
6097 }
6098 \f
6099 /* Handle an attribute requiring a FUNCTION_DECL;
6100    arguments as in struct attribute_spec.handler.  */
6101 static tree
6102 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6103                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6104 {
6105   if (TREE_CODE (*node) != FUNCTION_DECL)
6106     {
6107       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6108                name);
6109       *no_add_attrs = true;
6110     }
6111
6112   return NULL_TREE;
6113 }
6114
6115 /* Handle an "interrupt" or "isr" attribute;
6116    arguments as in struct attribute_spec.handler.  */
6117 static tree
6118 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6119                           bool *no_add_attrs)
6120 {
6121   if (DECL_P (*node))
6122     {
6123       if (TREE_CODE (*node) != FUNCTION_DECL)
6124         {
6125           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6126                    name);
6127           *no_add_attrs = true;
6128         }
6129       /* FIXME: the argument if any is checked for type attributes;
6130          should it be checked for decl ones?  */
6131     }
6132   else
6133     {
6134       if (TREE_CODE (*node) == FUNCTION_TYPE
6135           || TREE_CODE (*node) == METHOD_TYPE)
6136         {
6137           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6138             {
6139               warning (OPT_Wattributes, "%qE attribute ignored",
6140                        name);
6141               *no_add_attrs = true;
6142             }
6143         }
6144       else if (TREE_CODE (*node) == POINTER_TYPE
6145                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6146                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6147                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6148         {
6149           *node = build_variant_type_copy (*node);
6150           TREE_TYPE (*node) = build_type_attribute_variant
6151             (TREE_TYPE (*node),
6152              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6153           *no_add_attrs = true;
6154         }
6155       else
6156         {
6157           /* Possibly pass this attribute on from the type to a decl.  */
6158           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6159                        | (int) ATTR_FLAG_FUNCTION_NEXT
6160                        | (int) ATTR_FLAG_ARRAY_NEXT))
6161             {
6162               *no_add_attrs = true;
6163               return tree_cons (name, args, NULL_TREE);
6164             }
6165           else
6166             {
6167               warning (OPT_Wattributes, "%qE attribute ignored",
6168                        name);
6169             }
6170         }
6171     }
6172
6173   return NULL_TREE;
6174 }
6175
6176 /* Handle a "pcs" attribute; arguments as in struct
6177    attribute_spec.handler.  */
6178 static tree
6179 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6180                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6181 {
6182   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6183     {
6184       warning (OPT_Wattributes, "%qE attribute ignored", name);
6185       *no_add_attrs = true;
6186     }
6187   return NULL_TREE;
6188 }
6189
6190 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6191 /* Handle the "notshared" attribute.  This attribute is another way of
6192    requesting hidden visibility.  ARM's compiler supports
6193    "__declspec(notshared)"; we support the same thing via an
6194    attribute.  */
6195
6196 static tree
6197 arm_handle_notshared_attribute (tree *node,
6198                                 tree name ATTRIBUTE_UNUSED,
6199                                 tree args ATTRIBUTE_UNUSED,
6200                                 int flags ATTRIBUTE_UNUSED,
6201                                 bool *no_add_attrs)
6202 {
6203   tree decl = TYPE_NAME (*node);
6204
6205   if (decl)
6206     {
6207       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6208       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6209       *no_add_attrs = false;
6210     }
6211   return NULL_TREE;
6212 }
6213 #endif
6214
6215 /* Return 0 if the attributes for two types are incompatible, 1 if they
6216    are compatible, and 2 if they are nearly compatible (which causes a
6217    warning to be generated).  */
6218 static int
6219 arm_comp_type_attributes (const_tree type1, const_tree type2)
6220 {
6221   int l1, l2, s1, s2;
6222
6223   /* Check for mismatch of non-default calling convention.  */
6224   if (TREE_CODE (type1) != FUNCTION_TYPE)
6225     return 1;
6226
6227   /* Check for mismatched call attributes.  */
6228   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6229   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6230   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6231   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6232
6233   /* Only bother to check if an attribute is defined.  */
6234   if (l1 | l2 | s1 | s2)
6235     {
6236       /* If one type has an attribute, the other must have the same attribute.  */
6237       if ((l1 != l2) || (s1 != s2))
6238         return 0;
6239
6240       /* Disallow mixed attributes.  */
6241       if ((l1 & s2) || (l2 & s1))
6242         return 0;
6243     }
6244
6245   /* Check for mismatched ISR attribute.  */
6246   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6247   if (! l1)
6248     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6249   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6250   if (! l2)
6251     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6252   if (l1 != l2)
6253     return 0;
6254
6255   return 1;
6256 }
6257
6258 /*  Assigns default attributes to newly defined type.  This is used to
6259     set short_call/long_call attributes for function types of
6260     functions defined inside corresponding #pragma scopes.  */
6261 static void
6262 arm_set_default_type_attributes (tree type)
6263 {
6264   /* Add __attribute__ ((long_call)) to all functions, when
6265      inside #pragma long_calls or __attribute__ ((short_call)),
6266      when inside #pragma no_long_calls.  */
6267   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6268     {
6269       tree type_attr_list, attr_name;
6270       type_attr_list = TYPE_ATTRIBUTES (type);
6271
6272       if (arm_pragma_long_calls == LONG)
6273         attr_name = get_identifier ("long_call");
6274       else if (arm_pragma_long_calls == SHORT)
6275         attr_name = get_identifier ("short_call");
6276       else
6277         return;
6278
6279       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6280       TYPE_ATTRIBUTES (type) = type_attr_list;
6281     }
6282 }
6283 \f
6284 /* Return true if DECL is known to be linked into section SECTION.  */
6285
6286 static bool
6287 arm_function_in_section_p (tree decl, section *section)
6288 {
6289   /* We can only be certain about functions defined in the same
6290      compilation unit.  */
6291   if (!TREE_STATIC (decl))
6292     return false;
6293
6294   /* Make sure that SYMBOL always binds to the definition in this
6295      compilation unit.  */
6296   if (!targetm.binds_local_p (decl))
6297     return false;
6298
6299   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6300   if (!DECL_SECTION_NAME (decl))
6301     {
6302       /* Make sure that we will not create a unique section for DECL.  */
6303       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6304         return false;
6305     }
6306
6307   return function_section (decl) == section;
6308 }
6309
6310 /* Return nonzero if a 32-bit "long_call" should be generated for
6311    a call from the current function to DECL.  We generate a long_call
6312    if the function:
6313
6314         a.  has an __attribute__((long call))
6315      or b.  is within the scope of a #pragma long_calls
6316      or c.  the -mlong-calls command line switch has been specified
6317
6318    However we do not generate a long call if the function:
6319
6320         d.  has an __attribute__ ((short_call))
6321      or e.  is inside the scope of a #pragma no_long_calls
6322      or f.  is defined in the same section as the current function.  */
6323
6324 bool
6325 arm_is_long_call_p (tree decl)
6326 {
6327   tree attrs;
6328
6329   if (!decl)
6330     return TARGET_LONG_CALLS;
6331
6332   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6333   if (lookup_attribute ("short_call", attrs))
6334     return false;
6335
6336   /* For "f", be conservative, and only cater for cases in which the
6337      whole of the current function is placed in the same section.  */
6338   if (!flag_reorder_blocks_and_partition
6339       && TREE_CODE (decl) == FUNCTION_DECL
6340       && arm_function_in_section_p (decl, current_function_section ()))
6341     return false;
6342
6343   if (lookup_attribute ("long_call", attrs))
6344     return true;
6345
6346   return TARGET_LONG_CALLS;
6347 }
6348
6349 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6350 static bool
6351 arm_function_ok_for_sibcall (tree decl, tree exp)
6352 {
6353   unsigned long func_type;
6354
6355   if (cfun->machine->sibcall_blocked)
6356     return false;
6357
6358   /* Never tailcall something if we are generating code for Thumb-1.  */
6359   if (TARGET_THUMB1)
6360     return false;
6361
6362   /* The PIC register is live on entry to VxWorks PLT entries, so we
6363      must make the call before restoring the PIC register.  */
6364   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6365     return false;
6366
6367   /* If we are interworking and the function is not declared static
6368      then we can't tail-call it unless we know that it exists in this
6369      compilation unit (since it might be a Thumb routine).  */
6370   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6371       && !TREE_ASM_WRITTEN (decl))
6372     return false;
6373
6374   func_type = arm_current_func_type ();
6375   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6376   if (IS_INTERRUPT (func_type))
6377     return false;
6378
6379   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6380     {
6381       /* Check that the return value locations are the same.  For
6382          example that we aren't returning a value from the sibling in
6383          a VFP register but then need to transfer it to a core
6384          register.  */
6385       rtx a, b;
6386
6387       a = arm_function_value (TREE_TYPE (exp), decl, false);
6388       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6389                               cfun->decl, false);
6390       if (!rtx_equal_p (a, b))
6391         return false;
6392     }
6393
6394   /* Never tailcall if function may be called with a misaligned SP.  */
6395   if (IS_STACKALIGN (func_type))
6396     return false;
6397
6398   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6399      references should become a NOP.  Don't convert such calls into
6400      sibling calls.  */
6401   if (TARGET_AAPCS_BASED
6402       && arm_abi == ARM_ABI_AAPCS
6403       && decl
6404       && DECL_WEAK (decl))
6405     return false;
6406
6407   /* Everything else is ok.  */
6408   return true;
6409 }
6410
6411 \f
6412 /* Addressing mode support functions.  */
6413
6414 /* Return nonzero if X is a legitimate immediate operand when compiling
6415    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6416 int
6417 legitimate_pic_operand_p (rtx x)
6418 {
6419   if (GET_CODE (x) == SYMBOL_REF
6420       || (GET_CODE (x) == CONST
6421           && GET_CODE (XEXP (x, 0)) == PLUS
6422           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6423     return 0;
6424
6425   return 1;
6426 }
6427
6428 /* Record that the current function needs a PIC register.  Initialize
6429    cfun->machine->pic_reg if we have not already done so.  */
6430
6431 static void
6432 require_pic_register (void)
6433 {
6434   /* A lot of the logic here is made obscure by the fact that this
6435      routine gets called as part of the rtx cost estimation process.
6436      We don't want those calls to affect any assumptions about the real
6437      function; and further, we can't call entry_of_function() until we
6438      start the real expansion process.  */
6439   if (!crtl->uses_pic_offset_table)
6440     {
6441       gcc_assert (can_create_pseudo_p ());
6442       if (arm_pic_register != INVALID_REGNUM
6443           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6444         {
6445           if (!cfun->machine->pic_reg)
6446             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6447
6448           /* Play games to avoid marking the function as needing pic
6449              if we are being called as part of the cost-estimation
6450              process.  */
6451           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6452             crtl->uses_pic_offset_table = 1;
6453         }
6454       else
6455         {
6456           rtx_insn *seq, *insn;
6457
6458           if (!cfun->machine->pic_reg)
6459             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6460
6461           /* Play games to avoid marking the function as needing pic
6462              if we are being called as part of the cost-estimation
6463              process.  */
6464           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6465             {
6466               crtl->uses_pic_offset_table = 1;
6467               start_sequence ();
6468
6469               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6470                   && arm_pic_register > LAST_LO_REGNUM)
6471                 emit_move_insn (cfun->machine->pic_reg,
6472                                 gen_rtx_REG (Pmode, arm_pic_register));
6473               else
6474                 arm_load_pic_register (0UL);
6475
6476               seq = get_insns ();
6477               end_sequence ();
6478
6479               for (insn = seq; insn; insn = NEXT_INSN (insn))
6480                 if (INSN_P (insn))
6481                   INSN_LOCATION (insn) = prologue_location;
6482
6483               /* We can be called during expansion of PHI nodes, where
6484                  we can't yet emit instructions directly in the final
6485                  insn stream.  Queue the insns on the entry edge, they will
6486                  be committed after everything else is expanded.  */
6487               insert_insn_on_edge (seq,
6488                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6489             }
6490         }
6491     }
6492 }
6493
6494 rtx
6495 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6496 {
6497   if (GET_CODE (orig) == SYMBOL_REF
6498       || GET_CODE (orig) == LABEL_REF)
6499     {
6500       rtx insn;
6501
6502       if (reg == 0)
6503         {
6504           gcc_assert (can_create_pseudo_p ());
6505           reg = gen_reg_rtx (Pmode);
6506         }
6507
6508       /* VxWorks does not impose a fixed gap between segments; the run-time
6509          gap can be different from the object-file gap.  We therefore can't
6510          use GOTOFF unless we are absolutely sure that the symbol is in the
6511          same segment as the GOT.  Unfortunately, the flexibility of linker
6512          scripts means that we can't be sure of that in general, so assume
6513          that GOTOFF is never valid on VxWorks.  */
6514       if ((GET_CODE (orig) == LABEL_REF
6515            || (GET_CODE (orig) == SYMBOL_REF &&
6516                SYMBOL_REF_LOCAL_P (orig)))
6517           && NEED_GOT_RELOC
6518           && arm_pic_data_is_text_relative)
6519         insn = arm_pic_static_addr (orig, reg);
6520       else
6521         {
6522           rtx pat;
6523           rtx mem;
6524
6525           /* If this function doesn't have a pic register, create one now.  */
6526           require_pic_register ();
6527
6528           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6529
6530           /* Make the MEM as close to a constant as possible.  */
6531           mem = SET_SRC (pat);
6532           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6533           MEM_READONLY_P (mem) = 1;
6534           MEM_NOTRAP_P (mem) = 1;
6535
6536           insn = emit_insn (pat);
6537         }
6538
6539       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6540          by loop.  */
6541       set_unique_reg_note (insn, REG_EQUAL, orig);
6542
6543       return reg;
6544     }
6545   else if (GET_CODE (orig) == CONST)
6546     {
6547       rtx base, offset;
6548
6549       if (GET_CODE (XEXP (orig, 0)) == PLUS
6550           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6551         return orig;
6552
6553       /* Handle the case where we have: const (UNSPEC_TLS).  */
6554       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6555           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6556         return orig;
6557
6558       /* Handle the case where we have:
6559          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6560          CONST_INT.  */
6561       if (GET_CODE (XEXP (orig, 0)) == PLUS
6562           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6563           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6564         {
6565           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6566           return orig;
6567         }
6568
6569       if (reg == 0)
6570         {
6571           gcc_assert (can_create_pseudo_p ());
6572           reg = gen_reg_rtx (Pmode);
6573         }
6574
6575       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6576
6577       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6578       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6579                                        base == reg ? 0 : reg);
6580
6581       if (CONST_INT_P (offset))
6582         {
6583           /* The base register doesn't really matter, we only want to
6584              test the index for the appropriate mode.  */
6585           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6586             {
6587               gcc_assert (can_create_pseudo_p ());
6588               offset = force_reg (Pmode, offset);
6589             }
6590
6591           if (CONST_INT_P (offset))
6592             return plus_constant (Pmode, base, INTVAL (offset));
6593         }
6594
6595       if (GET_MODE_SIZE (mode) > 4
6596           && (GET_MODE_CLASS (mode) == MODE_INT
6597               || TARGET_SOFT_FLOAT))
6598         {
6599           emit_insn (gen_addsi3 (reg, base, offset));
6600           return reg;
6601         }
6602
6603       return gen_rtx_PLUS (Pmode, base, offset);
6604     }
6605
6606   return orig;
6607 }
6608
6609
6610 /* Find a spare register to use during the prolog of a function.  */
6611
6612 static int
6613 thumb_find_work_register (unsigned long pushed_regs_mask)
6614 {
6615   int reg;
6616
6617   /* Check the argument registers first as these are call-used.  The
6618      register allocation order means that sometimes r3 might be used
6619      but earlier argument registers might not, so check them all.  */
6620   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6621     if (!df_regs_ever_live_p (reg))
6622       return reg;
6623
6624   /* Before going on to check the call-saved registers we can try a couple
6625      more ways of deducing that r3 is available.  The first is when we are
6626      pushing anonymous arguments onto the stack and we have less than 4
6627      registers worth of fixed arguments(*).  In this case r3 will be part of
6628      the variable argument list and so we can be sure that it will be
6629      pushed right at the start of the function.  Hence it will be available
6630      for the rest of the prologue.
6631      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6632   if (cfun->machine->uses_anonymous_args
6633       && crtl->args.pretend_args_size > 0)
6634     return LAST_ARG_REGNUM;
6635
6636   /* The other case is when we have fixed arguments but less than 4 registers
6637      worth.  In this case r3 might be used in the body of the function, but
6638      it is not being used to convey an argument into the function.  In theory
6639      we could just check crtl->args.size to see how many bytes are
6640      being passed in argument registers, but it seems that it is unreliable.
6641      Sometimes it will have the value 0 when in fact arguments are being
6642      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6643      check the args_info.nregs field as well.  The problem with this field is
6644      that it makes no allowances for arguments that are passed to the
6645      function but which are not used.  Hence we could miss an opportunity
6646      when a function has an unused argument in r3.  But it is better to be
6647      safe than to be sorry.  */
6648   if (! cfun->machine->uses_anonymous_args
6649       && crtl->args.size >= 0
6650       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6651       && (TARGET_AAPCS_BASED
6652           ? crtl->args.info.aapcs_ncrn < 4
6653           : crtl->args.info.nregs < 4))
6654     return LAST_ARG_REGNUM;
6655
6656   /* Otherwise look for a call-saved register that is going to be pushed.  */
6657   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6658     if (pushed_regs_mask & (1 << reg))
6659       return reg;
6660
6661   if (TARGET_THUMB2)
6662     {
6663       /* Thumb-2 can use high regs.  */
6664       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6665         if (pushed_regs_mask & (1 << reg))
6666           return reg;
6667     }
6668   /* Something went wrong - thumb_compute_save_reg_mask()
6669      should have arranged for a suitable register to be pushed.  */
6670   gcc_unreachable ();
6671 }
6672
6673 static GTY(()) int pic_labelno;
6674
6675 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6676    low register.  */
6677
6678 void
6679 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6680 {
6681   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6682
6683   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6684     return;
6685
6686   gcc_assert (flag_pic);
6687
6688   pic_reg = cfun->machine->pic_reg;
6689   if (TARGET_VXWORKS_RTP)
6690     {
6691       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6692       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6693       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6694
6695       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6696
6697       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6698       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6699     }
6700   else
6701     {
6702       /* We use an UNSPEC rather than a LABEL_REF because this label
6703          never appears in the code stream.  */
6704
6705       labelno = GEN_INT (pic_labelno++);
6706       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6707       l1 = gen_rtx_CONST (VOIDmode, l1);
6708
6709       /* On the ARM the PC register contains 'dot + 8' at the time of the
6710          addition, on the Thumb it is 'dot + 4'.  */
6711       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6712       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6713                                 UNSPEC_GOTSYM_OFF);
6714       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6715
6716       if (TARGET_32BIT)
6717         {
6718           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6719         }
6720       else /* TARGET_THUMB1 */
6721         {
6722           if (arm_pic_register != INVALID_REGNUM
6723               && REGNO (pic_reg) > LAST_LO_REGNUM)
6724             {
6725               /* We will have pushed the pic register, so we should always be
6726                  able to find a work register.  */
6727               pic_tmp = gen_rtx_REG (SImode,
6728                                      thumb_find_work_register (saved_regs));
6729               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6730               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6731               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6732             }
6733           else if (arm_pic_register != INVALID_REGNUM
6734                    && arm_pic_register > LAST_LO_REGNUM
6735                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6736             {
6737               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6738               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6739               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6740             }
6741           else
6742             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6743         }
6744     }
6745
6746   /* Need to emit this whether or not we obey regdecls,
6747      since setjmp/longjmp can cause life info to screw up.  */
6748   emit_use (pic_reg);
6749 }
6750
6751 /* Generate code to load the address of a static var when flag_pic is set.  */
6752 static rtx
6753 arm_pic_static_addr (rtx orig, rtx reg)
6754 {
6755   rtx l1, labelno, offset_rtx, insn;
6756
6757   gcc_assert (flag_pic);
6758
6759   /* We use an UNSPEC rather than a LABEL_REF because this label
6760      never appears in the code stream.  */
6761   labelno = GEN_INT (pic_labelno++);
6762   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6763   l1 = gen_rtx_CONST (VOIDmode, l1);
6764
6765   /* On the ARM the PC register contains 'dot + 8' at the time of the
6766      addition, on the Thumb it is 'dot + 4'.  */
6767   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6768   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6769                                UNSPEC_SYMBOL_OFFSET);
6770   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6771
6772   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6773   return insn;
6774 }
6775
6776 /* Return nonzero if X is valid as an ARM state addressing register.  */
6777 static int
6778 arm_address_register_rtx_p (rtx x, int strict_p)
6779 {
6780   int regno;
6781
6782   if (!REG_P (x))
6783     return 0;
6784
6785   regno = REGNO (x);
6786
6787   if (strict_p)
6788     return ARM_REGNO_OK_FOR_BASE_P (regno);
6789
6790   return (regno <= LAST_ARM_REGNUM
6791           || regno >= FIRST_PSEUDO_REGISTER
6792           || regno == FRAME_POINTER_REGNUM
6793           || regno == ARG_POINTER_REGNUM);
6794 }
6795
6796 /* Return TRUE if this rtx is the difference of a symbol and a label,
6797    and will reduce to a PC-relative relocation in the object file.
6798    Expressions like this can be left alone when generating PIC, rather
6799    than forced through the GOT.  */
6800 static int
6801 pcrel_constant_p (rtx x)
6802 {
6803   if (GET_CODE (x) == MINUS)
6804     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6805
6806   return FALSE;
6807 }
6808
6809 /* Return true if X will surely end up in an index register after next
6810    splitting pass.  */
6811 static bool
6812 will_be_in_index_register (const_rtx x)
6813 {
6814   /* arm.md: calculate_pic_address will split this into a register.  */
6815   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6816 }
6817
6818 /* Return nonzero if X is a valid ARM state address operand.  */
6819 int
6820 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6821                                 int strict_p)
6822 {
6823   bool use_ldrd;
6824   enum rtx_code code = GET_CODE (x);
6825
6826   if (arm_address_register_rtx_p (x, strict_p))
6827     return 1;
6828
6829   use_ldrd = (TARGET_LDRD
6830               && (mode == DImode
6831                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6832
6833   if (code == POST_INC || code == PRE_DEC
6834       || ((code == PRE_INC || code == POST_DEC)
6835           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6836     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6837
6838   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6839            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6840            && GET_CODE (XEXP (x, 1)) == PLUS
6841            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6842     {
6843       rtx addend = XEXP (XEXP (x, 1), 1);
6844
6845       /* Don't allow ldrd post increment by register because it's hard
6846          to fixup invalid register choices.  */
6847       if (use_ldrd
6848           && GET_CODE (x) == POST_MODIFY
6849           && REG_P (addend))
6850         return 0;
6851
6852       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6853               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6854     }
6855
6856   /* After reload constants split into minipools will have addresses
6857      from a LABEL_REF.  */
6858   else if (reload_completed
6859            && (code == LABEL_REF
6860                || (code == CONST
6861                    && GET_CODE (XEXP (x, 0)) == PLUS
6862                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6863                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6864     return 1;
6865
6866   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6867     return 0;
6868
6869   else if (code == PLUS)
6870     {
6871       rtx xop0 = XEXP (x, 0);
6872       rtx xop1 = XEXP (x, 1);
6873
6874       return ((arm_address_register_rtx_p (xop0, strict_p)
6875                && ((CONST_INT_P (xop1)
6876                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6877                    || (!strict_p && will_be_in_index_register (xop1))))
6878               || (arm_address_register_rtx_p (xop1, strict_p)
6879                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6880     }
6881
6882 #if 0
6883   /* Reload currently can't handle MINUS, so disable this for now */
6884   else if (GET_CODE (x) == MINUS)
6885     {
6886       rtx xop0 = XEXP (x, 0);
6887       rtx xop1 = XEXP (x, 1);
6888
6889       return (arm_address_register_rtx_p (xop0, strict_p)
6890               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6891     }
6892 #endif
6893
6894   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6895            && code == SYMBOL_REF
6896            && CONSTANT_POOL_ADDRESS_P (x)
6897            && ! (flag_pic
6898                  && symbol_mentioned_p (get_pool_constant (x))
6899                  && ! pcrel_constant_p (get_pool_constant (x))))
6900     return 1;
6901
6902   return 0;
6903 }
6904
6905 /* Return nonzero if X is a valid Thumb-2 address operand.  */
6906 static int
6907 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6908 {
6909   bool use_ldrd;
6910   enum rtx_code code = GET_CODE (x);
6911
6912   if (arm_address_register_rtx_p (x, strict_p))
6913     return 1;
6914
6915   use_ldrd = (TARGET_LDRD
6916               && (mode == DImode
6917                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6918
6919   if (code == POST_INC || code == PRE_DEC
6920       || ((code == PRE_INC || code == POST_DEC)
6921           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6922     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6923
6924   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6925            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6926            && GET_CODE (XEXP (x, 1)) == PLUS
6927            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6928     {
6929       /* Thumb-2 only has autoincrement by constant.  */
6930       rtx addend = XEXP (XEXP (x, 1), 1);
6931       HOST_WIDE_INT offset;
6932
6933       if (!CONST_INT_P (addend))
6934         return 0;
6935
6936       offset = INTVAL(addend);
6937       if (GET_MODE_SIZE (mode) <= 4)
6938         return (offset > -256 && offset < 256);
6939
6940       return (use_ldrd && offset > -1024 && offset < 1024
6941               && (offset & 3) == 0);
6942     }
6943
6944   /* After reload constants split into minipools will have addresses
6945      from a LABEL_REF.  */
6946   else if (reload_completed
6947            && (code == LABEL_REF
6948                || (code == CONST
6949                    && GET_CODE (XEXP (x, 0)) == PLUS
6950                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6951                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6952     return 1;
6953
6954   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6955     return 0;
6956
6957   else if (code == PLUS)
6958     {
6959       rtx xop0 = XEXP (x, 0);
6960       rtx xop1 = XEXP (x, 1);
6961
6962       return ((arm_address_register_rtx_p (xop0, strict_p)
6963                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6964                    || (!strict_p && will_be_in_index_register (xop1))))
6965               || (arm_address_register_rtx_p (xop1, strict_p)
6966                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6967     }
6968
6969   /* Normally we can assign constant values to target registers without
6970      the help of constant pool.  But there are cases we have to use constant
6971      pool like:
6972      1) assign a label to register.
6973      2) sign-extend a 8bit value to 32bit and then assign to register.
6974
6975      Constant pool access in format:
6976      (set (reg r0) (mem (symbol_ref (".LC0"))))
6977      will cause the use of literal pool (later in function arm_reorg).
6978      So here we mark such format as an invalid format, then the compiler
6979      will adjust it into:
6980      (set (reg r0) (symbol_ref (".LC0")))
6981      (set (reg r0) (mem (reg r0))).
6982      No extra register is required, and (mem (reg r0)) won't cause the use
6983      of literal pools.  */
6984   else if (arm_disable_literal_pool && code == SYMBOL_REF
6985            && CONSTANT_POOL_ADDRESS_P (x))
6986     return 0;
6987
6988   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6989            && code == SYMBOL_REF
6990            && CONSTANT_POOL_ADDRESS_P (x)
6991            && ! (flag_pic
6992                  && symbol_mentioned_p (get_pool_constant (x))
6993                  && ! pcrel_constant_p (get_pool_constant (x))))
6994     return 1;
6995
6996   return 0;
6997 }
6998
6999 /* Return nonzero if INDEX is valid for an address index operand in
7000    ARM state.  */
7001 static int
7002 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7003                         int strict_p)
7004 {
7005   HOST_WIDE_INT range;
7006   enum rtx_code code = GET_CODE (index);
7007
7008   /* Standard coprocessor addressing modes.  */
7009   if (TARGET_HARD_FLOAT
7010       && TARGET_VFP
7011       && (mode == SFmode || mode == DFmode))
7012     return (code == CONST_INT && INTVAL (index) < 1024
7013             && INTVAL (index) > -1024
7014             && (INTVAL (index) & 3) == 0);
7015
7016   /* For quad modes, we restrict the constant offset to be slightly less
7017      than what the instruction format permits.  We do this because for
7018      quad mode moves, we will actually decompose them into two separate
7019      double-mode reads or writes.  INDEX must therefore be a valid
7020      (double-mode) offset and so should INDEX+8.  */
7021   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7022     return (code == CONST_INT
7023             && INTVAL (index) < 1016
7024             && INTVAL (index) > -1024
7025             && (INTVAL (index) & 3) == 0);
7026
7027   /* We have no such constraint on double mode offsets, so we permit the
7028      full range of the instruction format.  */
7029   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7030     return (code == CONST_INT
7031             && INTVAL (index) < 1024
7032             && INTVAL (index) > -1024
7033             && (INTVAL (index) & 3) == 0);
7034
7035   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7036     return (code == CONST_INT
7037             && INTVAL (index) < 1024
7038             && INTVAL (index) > -1024
7039             && (INTVAL (index) & 3) == 0);
7040
7041   if (arm_address_register_rtx_p (index, strict_p)
7042       && (GET_MODE_SIZE (mode) <= 4))
7043     return 1;
7044
7045   if (mode == DImode || mode == DFmode)
7046     {
7047       if (code == CONST_INT)
7048         {
7049           HOST_WIDE_INT val = INTVAL (index);
7050
7051           if (TARGET_LDRD)
7052             return val > -256 && val < 256;
7053           else
7054             return val > -4096 && val < 4092;
7055         }
7056
7057       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7058     }
7059
7060   if (GET_MODE_SIZE (mode) <= 4
7061       && ! (arm_arch4
7062             && (mode == HImode
7063                 || mode == HFmode
7064                 || (mode == QImode && outer == SIGN_EXTEND))))
7065     {
7066       if (code == MULT)
7067         {
7068           rtx xiop0 = XEXP (index, 0);
7069           rtx xiop1 = XEXP (index, 1);
7070
7071           return ((arm_address_register_rtx_p (xiop0, strict_p)
7072                    && power_of_two_operand (xiop1, SImode))
7073                   || (arm_address_register_rtx_p (xiop1, strict_p)
7074                       && power_of_two_operand (xiop0, SImode)));
7075         }
7076       else if (code == LSHIFTRT || code == ASHIFTRT
7077                || code == ASHIFT || code == ROTATERT)
7078         {
7079           rtx op = XEXP (index, 1);
7080
7081           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7082                   && CONST_INT_P (op)
7083                   && INTVAL (op) > 0
7084                   && INTVAL (op) <= 31);
7085         }
7086     }
7087
7088   /* For ARM v4 we may be doing a sign-extend operation during the
7089      load.  */
7090   if (arm_arch4)
7091     {
7092       if (mode == HImode
7093           || mode == HFmode
7094           || (outer == SIGN_EXTEND && mode == QImode))
7095         range = 256;
7096       else
7097         range = 4096;
7098     }
7099   else
7100     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7101
7102   return (code == CONST_INT
7103           && INTVAL (index) < range
7104           && INTVAL (index) > -range);
7105 }
7106
7107 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7108    index operand.  i.e. 1, 2, 4 or 8.  */
7109 static bool
7110 thumb2_index_mul_operand (rtx op)
7111 {
7112   HOST_WIDE_INT val;
7113
7114   if (!CONST_INT_P (op))
7115     return false;
7116
7117   val = INTVAL(op);
7118   return (val == 1 || val == 2 || val == 4 || val == 8);
7119 }
7120
7121 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7122 static int
7123 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7124 {
7125   enum rtx_code code = GET_CODE (index);
7126
7127   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7128   /* Standard coprocessor addressing modes.  */
7129   if (TARGET_HARD_FLOAT
7130       && TARGET_VFP
7131       && (mode == SFmode || mode == DFmode))
7132     return (code == CONST_INT && INTVAL (index) < 1024
7133             /* Thumb-2 allows only > -256 index range for it's core register
7134                load/stores. Since we allow SF/DF in core registers, we have
7135                to use the intersection between -256~4096 (core) and -1024~1024
7136                (coprocessor).  */
7137             && INTVAL (index) > -256
7138             && (INTVAL (index) & 3) == 0);
7139
7140   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7141     {
7142       /* For DImode assume values will usually live in core regs
7143          and only allow LDRD addressing modes.  */
7144       if (!TARGET_LDRD || mode != DImode)
7145         return (code == CONST_INT
7146                 && INTVAL (index) < 1024
7147                 && INTVAL (index) > -1024
7148                 && (INTVAL (index) & 3) == 0);
7149     }
7150
7151   /* For quad modes, we restrict the constant offset to be slightly less
7152      than what the instruction format permits.  We do this because for
7153      quad mode moves, we will actually decompose them into two separate
7154      double-mode reads or writes.  INDEX must therefore be a valid
7155      (double-mode) offset and so should INDEX+8.  */
7156   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7157     return (code == CONST_INT
7158             && INTVAL (index) < 1016
7159             && INTVAL (index) > -1024
7160             && (INTVAL (index) & 3) == 0);
7161
7162   /* We have no such constraint on double mode offsets, so we permit the
7163      full range of the instruction format.  */
7164   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7165     return (code == CONST_INT
7166             && INTVAL (index) < 1024
7167             && INTVAL (index) > -1024
7168             && (INTVAL (index) & 3) == 0);
7169
7170   if (arm_address_register_rtx_p (index, strict_p)
7171       && (GET_MODE_SIZE (mode) <= 4))
7172     return 1;
7173
7174   if (mode == DImode || mode == DFmode)
7175     {
7176       if (code == CONST_INT)
7177         {
7178           HOST_WIDE_INT val = INTVAL (index);
7179           /* ??? Can we assume ldrd for thumb2?  */
7180           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7181           /* ldrd supports offsets of +-1020.
7182              However the ldr fallback does not.  */
7183           return val > -256 && val < 256 && (val & 3) == 0;
7184         }
7185       else
7186         return 0;
7187     }
7188
7189   if (code == MULT)
7190     {
7191       rtx xiop0 = XEXP (index, 0);
7192       rtx xiop1 = XEXP (index, 1);
7193
7194       return ((arm_address_register_rtx_p (xiop0, strict_p)
7195                && thumb2_index_mul_operand (xiop1))
7196               || (arm_address_register_rtx_p (xiop1, strict_p)
7197                   && thumb2_index_mul_operand (xiop0)));
7198     }
7199   else if (code == ASHIFT)
7200     {
7201       rtx op = XEXP (index, 1);
7202
7203       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7204               && CONST_INT_P (op)
7205               && INTVAL (op) > 0
7206               && INTVAL (op) <= 3);
7207     }
7208
7209   return (code == CONST_INT
7210           && INTVAL (index) < 4096
7211           && INTVAL (index) > -256);
7212 }
7213
7214 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7215 static int
7216 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7217 {
7218   int regno;
7219
7220   if (!REG_P (x))
7221     return 0;
7222
7223   regno = REGNO (x);
7224
7225   if (strict_p)
7226     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7227
7228   return (regno <= LAST_LO_REGNUM
7229           || regno > LAST_VIRTUAL_REGISTER
7230           || regno == FRAME_POINTER_REGNUM
7231           || (GET_MODE_SIZE (mode) >= 4
7232               && (regno == STACK_POINTER_REGNUM
7233                   || regno >= FIRST_PSEUDO_REGISTER
7234                   || x == hard_frame_pointer_rtx
7235                   || x == arg_pointer_rtx)));
7236 }
7237
7238 /* Return nonzero if x is a legitimate index register.  This is the case
7239    for any base register that can access a QImode object.  */
7240 inline static int
7241 thumb1_index_register_rtx_p (rtx x, int strict_p)
7242 {
7243   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7244 }
7245
7246 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7247
7248    The AP may be eliminated to either the SP or the FP, so we use the
7249    least common denominator, e.g. SImode, and offsets from 0 to 64.
7250
7251    ??? Verify whether the above is the right approach.
7252
7253    ??? Also, the FP may be eliminated to the SP, so perhaps that
7254    needs special handling also.
7255
7256    ??? Look at how the mips16 port solves this problem.  It probably uses
7257    better ways to solve some of these problems.
7258
7259    Although it is not incorrect, we don't accept QImode and HImode
7260    addresses based on the frame pointer or arg pointer until the
7261    reload pass starts.  This is so that eliminating such addresses
7262    into stack based ones won't produce impossible code.  */
7263 int
7264 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7265 {
7266   /* ??? Not clear if this is right.  Experiment.  */
7267   if (GET_MODE_SIZE (mode) < 4
7268       && !(reload_in_progress || reload_completed)
7269       && (reg_mentioned_p (frame_pointer_rtx, x)
7270           || reg_mentioned_p (arg_pointer_rtx, x)
7271           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7272           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7273           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7274           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7275     return 0;
7276
7277   /* Accept any base register.  SP only in SImode or larger.  */
7278   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7279     return 1;
7280
7281   /* This is PC relative data before arm_reorg runs.  */
7282   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7283            && GET_CODE (x) == SYMBOL_REF
7284            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7285     return 1;
7286
7287   /* This is PC relative data after arm_reorg runs.  */
7288   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7289            && reload_completed
7290            && (GET_CODE (x) == LABEL_REF
7291                || (GET_CODE (x) == CONST
7292                    && GET_CODE (XEXP (x, 0)) == PLUS
7293                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7294                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7295     return 1;
7296
7297   /* Post-inc indexing only supported for SImode and larger.  */
7298   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7299            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7300     return 1;
7301
7302   else if (GET_CODE (x) == PLUS)
7303     {
7304       /* REG+REG address can be any two index registers.  */
7305       /* We disallow FRAME+REG addressing since we know that FRAME
7306          will be replaced with STACK, and SP relative addressing only
7307          permits SP+OFFSET.  */
7308       if (GET_MODE_SIZE (mode) <= 4
7309           && XEXP (x, 0) != frame_pointer_rtx
7310           && XEXP (x, 1) != frame_pointer_rtx
7311           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7312           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7313               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7314         return 1;
7315
7316       /* REG+const has 5-7 bit offset for non-SP registers.  */
7317       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7318                 || XEXP (x, 0) == arg_pointer_rtx)
7319                && CONST_INT_P (XEXP (x, 1))
7320                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7321         return 1;
7322
7323       /* REG+const has 10-bit offset for SP, but only SImode and
7324          larger is supported.  */
7325       /* ??? Should probably check for DI/DFmode overflow here
7326          just like GO_IF_LEGITIMATE_OFFSET does.  */
7327       else if (REG_P (XEXP (x, 0))
7328                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7329                && GET_MODE_SIZE (mode) >= 4
7330                && CONST_INT_P (XEXP (x, 1))
7331                && INTVAL (XEXP (x, 1)) >= 0
7332                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7333                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7334         return 1;
7335
7336       else if (REG_P (XEXP (x, 0))
7337                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7338                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7339                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7340                        && REGNO (XEXP (x, 0))
7341                           <= LAST_VIRTUAL_POINTER_REGISTER))
7342                && GET_MODE_SIZE (mode) >= 4
7343                && CONST_INT_P (XEXP (x, 1))
7344                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7345         return 1;
7346     }
7347
7348   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7349            && GET_MODE_SIZE (mode) == 4
7350            && GET_CODE (x) == SYMBOL_REF
7351            && CONSTANT_POOL_ADDRESS_P (x)
7352            && ! (flag_pic
7353                  && symbol_mentioned_p (get_pool_constant (x))
7354                  && ! pcrel_constant_p (get_pool_constant (x))))
7355     return 1;
7356
7357   return 0;
7358 }
7359
7360 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7361    instruction of mode MODE.  */
7362 int
7363 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7364 {
7365   switch (GET_MODE_SIZE (mode))
7366     {
7367     case 1:
7368       return val >= 0 && val < 32;
7369
7370     case 2:
7371       return val >= 0 && val < 64 && (val & 1) == 0;
7372
7373     default:
7374       return (val >= 0
7375               && (val + GET_MODE_SIZE (mode)) <= 128
7376               && (val & 3) == 0);
7377     }
7378 }
7379
7380 bool
7381 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7382 {
7383   if (TARGET_ARM)
7384     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7385   else if (TARGET_THUMB2)
7386     return thumb2_legitimate_address_p (mode, x, strict_p);
7387   else /* if (TARGET_THUMB1) */
7388     return thumb1_legitimate_address_p (mode, x, strict_p);
7389 }
7390
7391 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7392
7393    Given an rtx X being reloaded into a reg required to be
7394    in class CLASS, return the class of reg to actually use.
7395    In general this is just CLASS, but for the Thumb core registers and
7396    immediate constants we prefer a LO_REGS class or a subset.  */
7397
7398 static reg_class_t
7399 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7400 {
7401   if (TARGET_32BIT)
7402     return rclass;
7403   else
7404     {
7405       if (rclass == GENERAL_REGS)
7406         return LO_REGS;
7407       else
7408         return rclass;
7409     }
7410 }
7411
7412 /* Build the SYMBOL_REF for __tls_get_addr.  */
7413
7414 static GTY(()) rtx tls_get_addr_libfunc;
7415
7416 static rtx
7417 get_tls_get_addr (void)
7418 {
7419   if (!tls_get_addr_libfunc)
7420     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7421   return tls_get_addr_libfunc;
7422 }
7423
7424 rtx
7425 arm_load_tp (rtx target)
7426 {
7427   if (!target)
7428     target = gen_reg_rtx (SImode);
7429
7430   if (TARGET_HARD_TP)
7431     {
7432       /* Can return in any reg.  */
7433       emit_insn (gen_load_tp_hard (target));
7434     }
7435   else
7436     {
7437       /* Always returned in r0.  Immediately copy the result into a pseudo,
7438          otherwise other uses of r0 (e.g. setting up function arguments) may
7439          clobber the value.  */
7440
7441       rtx tmp;
7442
7443       emit_insn (gen_load_tp_soft ());
7444
7445       tmp = gen_rtx_REG (SImode, 0);
7446       emit_move_insn (target, tmp);
7447     }
7448   return target;
7449 }
7450
7451 static rtx
7452 load_tls_operand (rtx x, rtx reg)
7453 {
7454   rtx tmp;
7455
7456   if (reg == NULL_RTX)
7457     reg = gen_reg_rtx (SImode);
7458
7459   tmp = gen_rtx_CONST (SImode, x);
7460
7461   emit_move_insn (reg, tmp);
7462
7463   return reg;
7464 }
7465
7466 static rtx
7467 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7468 {
7469   rtx insns, label, labelno, sum;
7470
7471   gcc_assert (reloc != TLS_DESCSEQ);
7472   start_sequence ();
7473
7474   labelno = GEN_INT (pic_labelno++);
7475   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7476   label = gen_rtx_CONST (VOIDmode, label);
7477
7478   sum = gen_rtx_UNSPEC (Pmode,
7479                         gen_rtvec (4, x, GEN_INT (reloc), label,
7480                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7481                         UNSPEC_TLS);
7482   reg = load_tls_operand (sum, reg);
7483
7484   if (TARGET_ARM)
7485     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7486   else
7487     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7488
7489   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7490                                      LCT_PURE, /* LCT_CONST?  */
7491                                      Pmode, 1, reg, Pmode);
7492
7493   insns = get_insns ();
7494   end_sequence ();
7495
7496   return insns;
7497 }
7498
7499 static rtx
7500 arm_tls_descseq_addr (rtx x, rtx reg)
7501 {
7502   rtx labelno = GEN_INT (pic_labelno++);
7503   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7504   rtx sum = gen_rtx_UNSPEC (Pmode,
7505                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7506                                        gen_rtx_CONST (VOIDmode, label),
7507                                        GEN_INT (!TARGET_ARM)),
7508                             UNSPEC_TLS);
7509   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7510
7511   emit_insn (gen_tlscall (x, labelno));
7512   if (!reg)
7513     reg = gen_reg_rtx (SImode);
7514   else
7515     gcc_assert (REGNO (reg) != 0);
7516
7517   emit_move_insn (reg, reg0);
7518
7519   return reg;
7520 }
7521
7522 rtx
7523 legitimize_tls_address (rtx x, rtx reg)
7524 {
7525   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7526   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7527
7528   switch (model)
7529     {
7530     case TLS_MODEL_GLOBAL_DYNAMIC:
7531       if (TARGET_GNU2_TLS)
7532         {
7533           reg = arm_tls_descseq_addr (x, reg);
7534
7535           tp = arm_load_tp (NULL_RTX);
7536
7537           dest = gen_rtx_PLUS (Pmode, tp, reg);
7538         }
7539       else
7540         {
7541           /* Original scheme */
7542           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7543           dest = gen_reg_rtx (Pmode);
7544           emit_libcall_block (insns, dest, ret, x);
7545         }
7546       return dest;
7547
7548     case TLS_MODEL_LOCAL_DYNAMIC:
7549       if (TARGET_GNU2_TLS)
7550         {
7551           reg = arm_tls_descseq_addr (x, reg);
7552
7553           tp = arm_load_tp (NULL_RTX);
7554
7555           dest = gen_rtx_PLUS (Pmode, tp, reg);
7556         }
7557       else
7558         {
7559           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7560
7561           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7562              share the LDM result with other LD model accesses.  */
7563           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7564                                 UNSPEC_TLS);
7565           dest = gen_reg_rtx (Pmode);
7566           emit_libcall_block (insns, dest, ret, eqv);
7567
7568           /* Load the addend.  */
7569           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7570                                                      GEN_INT (TLS_LDO32)),
7571                                    UNSPEC_TLS);
7572           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7573           dest = gen_rtx_PLUS (Pmode, dest, addend);
7574         }
7575       return dest;
7576
7577     case TLS_MODEL_INITIAL_EXEC:
7578       labelno = GEN_INT (pic_labelno++);
7579       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7580       label = gen_rtx_CONST (VOIDmode, label);
7581       sum = gen_rtx_UNSPEC (Pmode,
7582                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7583                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7584                             UNSPEC_TLS);
7585       reg = load_tls_operand (sum, reg);
7586
7587       if (TARGET_ARM)
7588         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7589       else if (TARGET_THUMB2)
7590         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7591       else
7592         {
7593           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7594           emit_move_insn (reg, gen_const_mem (SImode, reg));
7595         }
7596
7597       tp = arm_load_tp (NULL_RTX);
7598
7599       return gen_rtx_PLUS (Pmode, tp, reg);
7600
7601     case TLS_MODEL_LOCAL_EXEC:
7602       tp = arm_load_tp (NULL_RTX);
7603
7604       reg = gen_rtx_UNSPEC (Pmode,
7605                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7606                             UNSPEC_TLS);
7607       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7608
7609       return gen_rtx_PLUS (Pmode, tp, reg);
7610
7611     default:
7612       abort ();
7613     }
7614 }
7615
7616 /* Try machine-dependent ways of modifying an illegitimate address
7617    to be legitimate.  If we find one, return the new, valid address.  */
7618 rtx
7619 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7620 {
7621   if (arm_tls_referenced_p (x))
7622     {
7623       rtx addend = NULL;
7624
7625       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7626         {
7627           addend = XEXP (XEXP (x, 0), 1);
7628           x = XEXP (XEXP (x, 0), 0);
7629         }
7630
7631       if (GET_CODE (x) != SYMBOL_REF)
7632         return x;
7633
7634       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7635
7636       x = legitimize_tls_address (x, NULL_RTX);
7637
7638       if (addend)
7639         {
7640           x = gen_rtx_PLUS (SImode, x, addend);
7641           orig_x = x;
7642         }
7643       else
7644         return x;
7645     }
7646
7647   if (!TARGET_ARM)
7648     {
7649       /* TODO: legitimize_address for Thumb2.  */
7650       if (TARGET_THUMB2)
7651         return x;
7652       return thumb_legitimize_address (x, orig_x, mode);
7653     }
7654
7655   if (GET_CODE (x) == PLUS)
7656     {
7657       rtx xop0 = XEXP (x, 0);
7658       rtx xop1 = XEXP (x, 1);
7659
7660       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7661         xop0 = force_reg (SImode, xop0);
7662
7663       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7664           && !symbol_mentioned_p (xop1))
7665         xop1 = force_reg (SImode, xop1);
7666
7667       if (ARM_BASE_REGISTER_RTX_P (xop0)
7668           && CONST_INT_P (xop1))
7669         {
7670           HOST_WIDE_INT n, low_n;
7671           rtx base_reg, val;
7672           n = INTVAL (xop1);
7673
7674           /* VFP addressing modes actually allow greater offsets, but for
7675              now we just stick with the lowest common denominator.  */
7676           if (mode == DImode
7677               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7678             {
7679               low_n = n & 0x0f;
7680               n &= ~0x0f;
7681               if (low_n > 4)
7682                 {
7683                   n += 16;
7684                   low_n -= 16;
7685                 }
7686             }
7687           else
7688             {
7689               low_n = ((mode) == TImode ? 0
7690                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7691               n -= low_n;
7692             }
7693
7694           base_reg = gen_reg_rtx (SImode);
7695           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7696           emit_move_insn (base_reg, val);
7697           x = plus_constant (Pmode, base_reg, low_n);
7698         }
7699       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7700         x = gen_rtx_PLUS (SImode, xop0, xop1);
7701     }
7702
7703   /* XXX We don't allow MINUS any more -- see comment in
7704      arm_legitimate_address_outer_p ().  */
7705   else if (GET_CODE (x) == MINUS)
7706     {
7707       rtx xop0 = XEXP (x, 0);
7708       rtx xop1 = XEXP (x, 1);
7709
7710       if (CONSTANT_P (xop0))
7711         xop0 = force_reg (SImode, xop0);
7712
7713       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7714         xop1 = force_reg (SImode, xop1);
7715
7716       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7717         x = gen_rtx_MINUS (SImode, xop0, xop1);
7718     }
7719
7720   /* Make sure to take full advantage of the pre-indexed addressing mode
7721      with absolute addresses which often allows for the base register to
7722      be factorized for multiple adjacent memory references, and it might
7723      even allows for the mini pool to be avoided entirely. */
7724   else if (CONST_INT_P (x) && optimize > 0)
7725     {
7726       unsigned int bits;
7727       HOST_WIDE_INT mask, base, index;
7728       rtx base_reg;
7729
7730       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7731          use a 8-bit index. So let's use a 12-bit index for SImode only and
7732          hope that arm_gen_constant will enable ldrb to use more bits. */
7733       bits = (mode == SImode) ? 12 : 8;
7734       mask = (1 << bits) - 1;
7735       base = INTVAL (x) & ~mask;
7736       index = INTVAL (x) & mask;
7737       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7738         {
7739           /* It'll most probably be more efficient to generate the base
7740              with more bits set and use a negative index instead. */
7741           base |= mask;
7742           index -= mask;
7743         }
7744       base_reg = force_reg (SImode, GEN_INT (base));
7745       x = plus_constant (Pmode, base_reg, index);
7746     }
7747
7748   if (flag_pic)
7749     {
7750       /* We need to find and carefully transform any SYMBOL and LABEL
7751          references; so go back to the original address expression.  */
7752       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7753
7754       if (new_x != orig_x)
7755         x = new_x;
7756     }
7757
7758   return x;
7759 }
7760
7761
7762 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7763    to be legitimate.  If we find one, return the new, valid address.  */
7764 rtx
7765 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7766 {
7767   if (GET_CODE (x) == PLUS
7768       && CONST_INT_P (XEXP (x, 1))
7769       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7770           || INTVAL (XEXP (x, 1)) < 0))
7771     {
7772       rtx xop0 = XEXP (x, 0);
7773       rtx xop1 = XEXP (x, 1);
7774       HOST_WIDE_INT offset = INTVAL (xop1);
7775
7776       /* Try and fold the offset into a biasing of the base register and
7777          then offsetting that.  Don't do this when optimizing for space
7778          since it can cause too many CSEs.  */
7779       if (optimize_size && offset >= 0
7780           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7781         {
7782           HOST_WIDE_INT delta;
7783
7784           if (offset >= 256)
7785             delta = offset - (256 - GET_MODE_SIZE (mode));
7786           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7787             delta = 31 * GET_MODE_SIZE (mode);
7788           else
7789             delta = offset & (~31 * GET_MODE_SIZE (mode));
7790
7791           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7792                                 NULL_RTX);
7793           x = plus_constant (Pmode, xop0, delta);
7794         }
7795       else if (offset < 0 && offset > -256)
7796         /* Small negative offsets are best done with a subtract before the
7797            dereference, forcing these into a register normally takes two
7798            instructions.  */
7799         x = force_operand (x, NULL_RTX);
7800       else
7801         {
7802           /* For the remaining cases, force the constant into a register.  */
7803           xop1 = force_reg (SImode, xop1);
7804           x = gen_rtx_PLUS (SImode, xop0, xop1);
7805         }
7806     }
7807   else if (GET_CODE (x) == PLUS
7808            && s_register_operand (XEXP (x, 1), SImode)
7809            && !s_register_operand (XEXP (x, 0), SImode))
7810     {
7811       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7812
7813       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7814     }
7815
7816   if (flag_pic)
7817     {
7818       /* We need to find and carefully transform any SYMBOL and LABEL
7819          references; so go back to the original address expression.  */
7820       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7821
7822       if (new_x != orig_x)
7823         x = new_x;
7824     }
7825
7826   return x;
7827 }
7828
7829 bool
7830 arm_legitimize_reload_address (rtx *p,
7831                                machine_mode mode,
7832                                int opnum, int type,
7833                                int ind_levels ATTRIBUTE_UNUSED)
7834 {
7835   /* We must recognize output that we have already generated ourselves.  */
7836   if (GET_CODE (*p) == PLUS
7837       && GET_CODE (XEXP (*p, 0)) == PLUS
7838       && REG_P (XEXP (XEXP (*p, 0), 0))
7839       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7840       && CONST_INT_P (XEXP (*p, 1)))
7841     {
7842       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7843                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7844                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7845       return true;
7846     }
7847
7848   if (GET_CODE (*p) == PLUS
7849       && REG_P (XEXP (*p, 0))
7850       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7851       /* If the base register is equivalent to a constant, let the generic
7852          code handle it.  Otherwise we will run into problems if a future
7853          reload pass decides to rematerialize the constant.  */
7854       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7855       && CONST_INT_P (XEXP (*p, 1)))
7856     {
7857       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7858       HOST_WIDE_INT low, high;
7859
7860       /* Detect coprocessor load/stores.  */
7861       bool coproc_p = ((TARGET_HARD_FLOAT
7862                         && TARGET_VFP
7863                         && (mode == SFmode || mode == DFmode))
7864                        || (TARGET_REALLY_IWMMXT
7865                            && VALID_IWMMXT_REG_MODE (mode))
7866                        || (TARGET_NEON
7867                            && (VALID_NEON_DREG_MODE (mode)
7868                                || VALID_NEON_QREG_MODE (mode))));
7869
7870       /* For some conditions, bail out when lower two bits are unaligned.  */
7871       if ((val & 0x3) != 0
7872           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7873           && (coproc_p
7874               /* For DI, and DF under soft-float: */
7875               || ((mode == DImode || mode == DFmode)
7876                   /* Without ldrd, we use stm/ldm, which does not
7877                      fair well with unaligned bits.  */
7878                   && (! TARGET_LDRD
7879                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7880                       || TARGET_THUMB2))))
7881         return false;
7882
7883       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7884          of which the (reg+high) gets turned into a reload add insn,
7885          we try to decompose the index into high/low values that can often
7886          also lead to better reload CSE.
7887          For example:
7888                  ldr r0, [r2, #4100]  // Offset too large
7889                  ldr r1, [r2, #4104]  // Offset too large
7890
7891          is best reloaded as:
7892                  add t1, r2, #4096
7893                  ldr r0, [t1, #4]
7894                  add t2, r2, #4096
7895                  ldr r1, [t2, #8]
7896
7897          which post-reload CSE can simplify in most cases to eliminate the
7898          second add instruction:
7899                  add t1, r2, #4096
7900                  ldr r0, [t1, #4]
7901                  ldr r1, [t1, #8]
7902
7903          The idea here is that we want to split out the bits of the constant
7904          as a mask, rather than as subtracting the maximum offset that the
7905          respective type of load/store used can handle.
7906
7907          When encountering negative offsets, we can still utilize it even if
7908          the overall offset is positive; sometimes this may lead to an immediate
7909          that can be constructed with fewer instructions.
7910          For example:
7911                  ldr r0, [r2, #0x3FFFFC]
7912
7913          This is best reloaded as:
7914                  add t1, r2, #0x400000
7915                  ldr r0, [t1, #-4]
7916
7917          The trick for spotting this for a load insn with N bits of offset
7918          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7919          negative offset that is going to make bit N and all the bits below
7920          it become zero in the remainder part.
7921
7922          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7923          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7924          used in most cases of ARM load/store instructions.  */
7925
7926 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
7927       (((VAL) & ((1 << (N)) - 1))                                       \
7928        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
7929        : 0)
7930
7931       if (coproc_p)
7932         {
7933           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7934
7935           /* NEON quad-word load/stores are made of two double-word accesses,
7936              so the valid index range is reduced by 8. Treat as 9-bit range if
7937              we go over it.  */
7938           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7939             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7940         }
7941       else if (GET_MODE_SIZE (mode) == 8)
7942         {
7943           if (TARGET_LDRD)
7944             low = (TARGET_THUMB2
7945                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7946                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7947           else
7948             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7949                to access doublewords. The supported load/store offsets are
7950                -8, -4, and 4, which we try to produce here.  */
7951             low = ((val & 0xf) ^ 0x8) - 0x8;
7952         }
7953       else if (GET_MODE_SIZE (mode) < 8)
7954         {
7955           /* NEON element load/stores do not have an offset.  */
7956           if (TARGET_NEON_FP16 && mode == HFmode)
7957             return false;
7958
7959           if (TARGET_THUMB2)
7960             {
7961               /* Thumb-2 has an asymmetrical index range of (-256,4096).
7962                  Try the wider 12-bit range first, and re-try if the result
7963                  is out of range.  */
7964               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7965               if (low < -255)
7966                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7967             }
7968           else
7969             {
7970               if (mode == HImode || mode == HFmode)
7971                 {
7972                   if (arm_arch4)
7973                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7974                   else
7975                     {
7976                       /* The storehi/movhi_bytes fallbacks can use only
7977                          [-4094,+4094] of the full ldrb/strb index range.  */
7978                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7979                       if (low == 4095 || low == -4095)
7980                         return false;
7981                     }
7982                 }
7983               else
7984                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7985             }
7986         }
7987       else
7988         return false;
7989
7990       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7991                ^ (unsigned HOST_WIDE_INT) 0x80000000)
7992               - (unsigned HOST_WIDE_INT) 0x80000000);
7993       /* Check for overflow or zero */
7994       if (low == 0 || high == 0 || (high + low != val))
7995         return false;
7996
7997       /* Reload the high part into a base reg; leave the low part
7998          in the mem.
7999          Note that replacing this gen_rtx_PLUS with plus_constant is
8000          wrong in this case because we rely on the
8001          (plus (plus reg c1) c2) structure being preserved so that
8002          XEXP (*p, 0) in push_reload below uses the correct term.  */
8003       *p = gen_rtx_PLUS (GET_MODE (*p),
8004                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8005                                        GEN_INT (high)),
8006                          GEN_INT (low));
8007       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8008                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8009                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
8010       return true;
8011     }
8012
8013   return false;
8014 }
8015
8016 rtx
8017 thumb_legitimize_reload_address (rtx *x_p,
8018                                  machine_mode mode,
8019                                  int opnum, int type,
8020                                  int ind_levels ATTRIBUTE_UNUSED)
8021 {
8022   rtx x = *x_p;
8023
8024   if (GET_CODE (x) == PLUS
8025       && GET_MODE_SIZE (mode) < 4
8026       && REG_P (XEXP (x, 0))
8027       && XEXP (x, 0) == stack_pointer_rtx
8028       && CONST_INT_P (XEXP (x, 1))
8029       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8030     {
8031       rtx orig_x = x;
8032
8033       x = copy_rtx (x);
8034       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8035                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8036       return x;
8037     }
8038
8039   /* If both registers are hi-regs, then it's better to reload the
8040      entire expression rather than each register individually.  That
8041      only requires one reload register rather than two.  */
8042   if (GET_CODE (x) == PLUS
8043       && REG_P (XEXP (x, 0))
8044       && REG_P (XEXP (x, 1))
8045       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8046       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8047     {
8048       rtx orig_x = x;
8049
8050       x = copy_rtx (x);
8051       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8052                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8053       return x;
8054     }
8055
8056   return NULL;
8057 }
8058
8059 /* Return TRUE if X contains any TLS symbol references.  */
8060
8061 bool
8062 arm_tls_referenced_p (rtx x)
8063 {
8064   if (! TARGET_HAVE_TLS)
8065     return false;
8066
8067   subrtx_iterator::array_type array;
8068   FOR_EACH_SUBRTX (iter, array, x, ALL)
8069     {
8070       const_rtx x = *iter;
8071       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8072         return true;
8073
8074       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8075          TLS offsets, not real symbol references.  */
8076       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8077         iter.skip_subrtxes ();
8078     }
8079   return false;
8080 }
8081
8082 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8083
8084    On the ARM, allow any integer (invalid ones are removed later by insn
8085    patterns), nice doubles and symbol_refs which refer to the function's
8086    constant pool XXX.
8087
8088    When generating pic allow anything.  */
8089
8090 static bool
8091 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8092 {
8093   /* At present, we have no support for Neon structure constants, so forbid
8094      them here.  It might be possible to handle simple cases like 0 and -1
8095      in future.  */
8096   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8097     return false;
8098
8099   return flag_pic || !label_mentioned_p (x);
8100 }
8101
8102 static bool
8103 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8104 {
8105   return (CONST_INT_P (x)
8106           || CONST_DOUBLE_P (x)
8107           || CONSTANT_ADDRESS_P (x)
8108           || flag_pic);
8109 }
8110
8111 static bool
8112 arm_legitimate_constant_p (machine_mode mode, rtx x)
8113 {
8114   return (!arm_cannot_force_const_mem (mode, x)
8115           && (TARGET_32BIT
8116               ? arm_legitimate_constant_p_1 (mode, x)
8117               : thumb_legitimate_constant_p (mode, x)));
8118 }
8119
8120 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8121
8122 static bool
8123 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8124 {
8125   rtx base, offset;
8126
8127   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8128     {
8129       split_const (x, &base, &offset);
8130       if (GET_CODE (base) == SYMBOL_REF
8131           && !offset_within_block_p (base, INTVAL (offset)))
8132         return true;
8133     }
8134   return arm_tls_referenced_p (x);
8135 }
8136 \f
8137 #define REG_OR_SUBREG_REG(X)                                            \
8138   (REG_P (X)                                                    \
8139    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8140
8141 #define REG_OR_SUBREG_RTX(X)                    \
8142    (REG_P (X) ? (X) : SUBREG_REG (X))
8143
8144 static inline int
8145 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8146 {
8147   machine_mode mode = GET_MODE (x);
8148   int total, words;
8149
8150   switch (code)
8151     {
8152     case ASHIFT:
8153     case ASHIFTRT:
8154     case LSHIFTRT:
8155     case ROTATERT:
8156       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8157
8158     case PLUS:
8159     case MINUS:
8160     case COMPARE:
8161     case NEG:
8162     case NOT:
8163       return COSTS_N_INSNS (1);
8164
8165     case MULT:
8166       if (CONST_INT_P (XEXP (x, 1)))
8167         {
8168           int cycles = 0;
8169           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8170
8171           while (i)
8172             {
8173               i >>= 2;
8174               cycles++;
8175             }
8176           return COSTS_N_INSNS (2) + cycles;
8177         }
8178       return COSTS_N_INSNS (1) + 16;
8179
8180     case SET:
8181       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8182          the mode.  */
8183       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8184       return (COSTS_N_INSNS (words)
8185               + 4 * ((MEM_P (SET_SRC (x)))
8186                      + MEM_P (SET_DEST (x))));
8187
8188     case CONST_INT:
8189       if (outer == SET)
8190         {
8191           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8192             return 0;
8193           if (thumb_shiftable_const (INTVAL (x)))
8194             return COSTS_N_INSNS (2);
8195           return COSTS_N_INSNS (3);
8196         }
8197       else if ((outer == PLUS || outer == COMPARE)
8198                && INTVAL (x) < 256 && INTVAL (x) > -256)
8199         return 0;
8200       else if ((outer == IOR || outer == XOR || outer == AND)
8201                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8202         return COSTS_N_INSNS (1);
8203       else if (outer == AND)
8204         {
8205           int i;
8206           /* This duplicates the tests in the andsi3 expander.  */
8207           for (i = 9; i <= 31; i++)
8208             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8209                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8210               return COSTS_N_INSNS (2);
8211         }
8212       else if (outer == ASHIFT || outer == ASHIFTRT
8213                || outer == LSHIFTRT)
8214         return 0;
8215       return COSTS_N_INSNS (2);
8216
8217     case CONST:
8218     case CONST_DOUBLE:
8219     case LABEL_REF:
8220     case SYMBOL_REF:
8221       return COSTS_N_INSNS (3);
8222
8223     case UDIV:
8224     case UMOD:
8225     case DIV:
8226     case MOD:
8227       return 100;
8228
8229     case TRUNCATE:
8230       return 99;
8231
8232     case AND:
8233     case XOR:
8234     case IOR:
8235       /* XXX guess.  */
8236       return 8;
8237
8238     case MEM:
8239       /* XXX another guess.  */
8240       /* Memory costs quite a lot for the first word, but subsequent words
8241          load at the equivalent of a single insn each.  */
8242       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8243               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8244                  ? 4 : 0));
8245
8246     case IF_THEN_ELSE:
8247       /* XXX a guess.  */
8248       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8249         return 14;
8250       return 2;
8251
8252     case SIGN_EXTEND:
8253     case ZERO_EXTEND:
8254       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8255       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8256
8257       if (mode == SImode)
8258         return total;
8259
8260       if (arm_arch6)
8261         return total + COSTS_N_INSNS (1);
8262
8263       /* Assume a two-shift sequence.  Increase the cost slightly so
8264          we prefer actual shifts over an extend operation.  */
8265       return total + 1 + COSTS_N_INSNS (2);
8266
8267     default:
8268       return 99;
8269     }
8270 }
8271
8272 static inline bool
8273 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8274 {
8275   machine_mode mode = GET_MODE (x);
8276   enum rtx_code subcode;
8277   rtx operand;
8278   enum rtx_code code = GET_CODE (x);
8279   *total = 0;
8280
8281   switch (code)
8282     {
8283     case MEM:
8284       /* Memory costs quite a lot for the first word, but subsequent words
8285          load at the equivalent of a single insn each.  */
8286       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8287       return true;
8288
8289     case DIV:
8290     case MOD:
8291     case UDIV:
8292     case UMOD:
8293       if (TARGET_HARD_FLOAT && mode == SFmode)
8294         *total = COSTS_N_INSNS (2);
8295       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8296         *total = COSTS_N_INSNS (4);
8297       else
8298         *total = COSTS_N_INSNS (20);
8299       return false;
8300
8301     case ROTATE:
8302       if (REG_P (XEXP (x, 1)))
8303         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8304       else if (!CONST_INT_P (XEXP (x, 1)))
8305         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8306
8307       /* Fall through */
8308     case ROTATERT:
8309       if (mode != SImode)
8310         {
8311           *total += COSTS_N_INSNS (4);
8312           return true;
8313         }
8314
8315       /* Fall through */
8316     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8317       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8318       if (mode == DImode)
8319         {
8320           *total += COSTS_N_INSNS (3);
8321           return true;
8322         }
8323
8324       *total += COSTS_N_INSNS (1);
8325       /* Increase the cost of complex shifts because they aren't any faster,
8326          and reduce dual issue opportunities.  */
8327       if (arm_tune_cortex_a9
8328           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8329         ++*total;
8330
8331       return true;
8332
8333     case MINUS:
8334       if (mode == DImode)
8335         {
8336           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8337           if (CONST_INT_P (XEXP (x, 0))
8338               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8339             {
8340               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8341               return true;
8342             }
8343
8344           if (CONST_INT_P (XEXP (x, 1))
8345               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8346             {
8347               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8348               return true;
8349             }
8350
8351           return false;
8352         }
8353
8354       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8355         {
8356           if (TARGET_HARD_FLOAT
8357               && (mode == SFmode
8358                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8359             {
8360               *total = COSTS_N_INSNS (1);
8361               if (CONST_DOUBLE_P (XEXP (x, 0))
8362                   && arm_const_double_rtx (XEXP (x, 0)))
8363                 {
8364                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8365                   return true;
8366                 }
8367
8368               if (CONST_DOUBLE_P (XEXP (x, 1))
8369                   && arm_const_double_rtx (XEXP (x, 1)))
8370                 {
8371                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8372                   return true;
8373                 }
8374
8375               return false;
8376             }
8377           *total = COSTS_N_INSNS (20);
8378           return false;
8379         }
8380
8381       *total = COSTS_N_INSNS (1);
8382       if (CONST_INT_P (XEXP (x, 0))
8383           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8384         {
8385           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8386           return true;
8387         }
8388
8389       subcode = GET_CODE (XEXP (x, 1));
8390       if (subcode == ASHIFT || subcode == ASHIFTRT
8391           || subcode == LSHIFTRT
8392           || subcode == ROTATE || subcode == ROTATERT)
8393         {
8394           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8395           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8396           return true;
8397         }
8398
8399       /* A shift as a part of RSB costs no more than RSB itself.  */
8400       if (GET_CODE (XEXP (x, 0)) == MULT
8401           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8402         {
8403           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8404           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8405           return true;
8406         }
8407
8408       if (subcode == MULT
8409           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8410         {
8411           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8412           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8413           return true;
8414         }
8415
8416       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8417           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8418         {
8419           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8420           if (REG_P (XEXP (XEXP (x, 1), 0))
8421               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8422             *total += COSTS_N_INSNS (1);
8423
8424           return true;
8425         }
8426
8427       /* Fall through */
8428
8429     case PLUS:
8430       if (code == PLUS && arm_arch6 && mode == SImode
8431           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8432               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8433         {
8434           *total = COSTS_N_INSNS (1);
8435           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8436                               0, speed);
8437           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8438           return true;
8439         }
8440
8441       /* MLA: All arguments must be registers.  We filter out
8442          multiplication by a power of two, so that we fall down into
8443          the code below.  */
8444       if (GET_CODE (XEXP (x, 0)) == MULT
8445           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8446         {
8447           /* The cost comes from the cost of the multiply.  */
8448           return false;
8449         }
8450
8451       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8452         {
8453           if (TARGET_HARD_FLOAT
8454               && (mode == SFmode
8455                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8456             {
8457               *total = COSTS_N_INSNS (1);
8458               if (CONST_DOUBLE_P (XEXP (x, 1))
8459                   && arm_const_double_rtx (XEXP (x, 1)))
8460                 {
8461                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8462                   return true;
8463                 }
8464
8465               return false;
8466             }
8467
8468           *total = COSTS_N_INSNS (20);
8469           return false;
8470         }
8471
8472       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8473           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8474         {
8475           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8476           if (REG_P (XEXP (XEXP (x, 0), 0))
8477               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8478             *total += COSTS_N_INSNS (1);
8479           return true;
8480         }
8481
8482       /* Fall through */
8483
8484     case AND: case XOR: case IOR:
8485
8486       /* Normally the frame registers will be spilt into reg+const during
8487          reload, so it is a bad idea to combine them with other instructions,
8488          since then they might not be moved outside of loops.  As a compromise
8489          we allow integration with ops that have a constant as their second
8490          operand.  */
8491       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8492           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8493           && !CONST_INT_P (XEXP (x, 1)))
8494         *total = COSTS_N_INSNS (1);
8495
8496       if (mode == DImode)
8497         {
8498           *total += COSTS_N_INSNS (2);
8499           if (CONST_INT_P (XEXP (x, 1))
8500               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8501             {
8502               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8503               return true;
8504             }
8505
8506           return false;
8507         }
8508
8509       *total += COSTS_N_INSNS (1);
8510       if (CONST_INT_P (XEXP (x, 1))
8511           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8512         {
8513           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8514           return true;
8515         }
8516       subcode = GET_CODE (XEXP (x, 0));
8517       if (subcode == ASHIFT || subcode == ASHIFTRT
8518           || subcode == LSHIFTRT
8519           || subcode == ROTATE || subcode == ROTATERT)
8520         {
8521           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8522           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8523           return true;
8524         }
8525
8526       if (subcode == MULT
8527           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8528         {
8529           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8530           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8531           return true;
8532         }
8533
8534       if (subcode == UMIN || subcode == UMAX
8535           || subcode == SMIN || subcode == SMAX)
8536         {
8537           *total = COSTS_N_INSNS (3);
8538           return true;
8539         }
8540
8541       return false;
8542
8543     case MULT:
8544       /* This should have been handled by the CPU specific routines.  */
8545       gcc_unreachable ();
8546
8547     case TRUNCATE:
8548       if (arm_arch3m && mode == SImode
8549           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8550           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8551           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8552               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8553           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8554               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8555         {
8556           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8557           return true;
8558         }
8559       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8560       return false;
8561
8562     case NEG:
8563       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8564         {
8565           if (TARGET_HARD_FLOAT
8566               && (mode == SFmode
8567                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8568             {
8569               *total = COSTS_N_INSNS (1);
8570               return false;
8571             }
8572           *total = COSTS_N_INSNS (2);
8573           return false;
8574         }
8575
8576       /* Fall through */
8577     case NOT:
8578       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8579       if (mode == SImode && code == NOT)
8580         {
8581           subcode = GET_CODE (XEXP (x, 0));
8582           if (subcode == ASHIFT || subcode == ASHIFTRT
8583               || subcode == LSHIFTRT
8584               || subcode == ROTATE || subcode == ROTATERT
8585               || (subcode == MULT
8586                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8587             {
8588               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8589               /* Register shifts cost an extra cycle.  */
8590               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8591                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8592                                                         subcode, 1, speed);
8593               return true;
8594             }
8595         }
8596
8597       return false;
8598
8599     case IF_THEN_ELSE:
8600       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8601         {
8602           *total = COSTS_N_INSNS (4);
8603           return true;
8604         }
8605
8606       operand = XEXP (x, 0);
8607
8608       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8609              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8610             && REG_P (XEXP (operand, 0))
8611             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8612         *total += COSTS_N_INSNS (1);
8613       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8614                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8615       return true;
8616
8617     case NE:
8618       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8619         {
8620           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8621           return true;
8622         }
8623       goto scc_insn;
8624
8625     case GE:
8626       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8627           && mode == SImode && XEXP (x, 1) == const0_rtx)
8628         {
8629           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8630           return true;
8631         }
8632       goto scc_insn;
8633
8634     case LT:
8635       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8636           && mode == SImode && XEXP (x, 1) == const0_rtx)
8637         {
8638           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8639           return true;
8640         }
8641       goto scc_insn;
8642
8643     case EQ:
8644     case GT:
8645     case LE:
8646     case GEU:
8647     case LTU:
8648     case GTU:
8649     case LEU:
8650     case UNORDERED:
8651     case ORDERED:
8652     case UNEQ:
8653     case UNGE:
8654     case UNLT:
8655     case UNGT:
8656     case UNLE:
8657     scc_insn:
8658       /* SCC insns.  In the case where the comparison has already been
8659          performed, then they cost 2 instructions.  Otherwise they need
8660          an additional comparison before them.  */
8661       *total = COSTS_N_INSNS (2);
8662       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8663         {
8664           return true;
8665         }
8666
8667       /* Fall through */
8668     case COMPARE:
8669       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8670         {
8671           *total = 0;
8672           return true;
8673         }
8674
8675       *total += COSTS_N_INSNS (1);
8676       if (CONST_INT_P (XEXP (x, 1))
8677           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8678         {
8679           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8680           return true;
8681         }
8682
8683       subcode = GET_CODE (XEXP (x, 0));
8684       if (subcode == ASHIFT || subcode == ASHIFTRT
8685           || subcode == LSHIFTRT
8686           || subcode == ROTATE || subcode == ROTATERT)
8687         {
8688           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8689           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8690           return true;
8691         }
8692
8693       if (subcode == MULT
8694           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8695         {
8696           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8697           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8698           return true;
8699         }
8700
8701       return false;
8702
8703     case UMIN:
8704     case UMAX:
8705     case SMIN:
8706     case SMAX:
8707       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8708       if (!CONST_INT_P (XEXP (x, 1))
8709           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8710         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8711       return true;
8712
8713     case ABS:
8714       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8715         {
8716           if (TARGET_HARD_FLOAT
8717               && (mode == SFmode
8718                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8719             {
8720               *total = COSTS_N_INSNS (1);
8721               return false;
8722             }
8723           *total = COSTS_N_INSNS (20);
8724           return false;
8725         }
8726       *total = COSTS_N_INSNS (1);
8727       if (mode == DImode)
8728         *total += COSTS_N_INSNS (3);
8729       return false;
8730
8731     case SIGN_EXTEND:
8732     case ZERO_EXTEND:
8733       *total = 0;
8734       if (GET_MODE_CLASS (mode) == MODE_INT)
8735         {
8736           rtx op = XEXP (x, 0);
8737           machine_mode opmode = GET_MODE (op);
8738
8739           if (mode == DImode)
8740             *total += COSTS_N_INSNS (1);
8741
8742           if (opmode != SImode)
8743             {
8744               if (MEM_P (op))
8745                 {
8746                   /* If !arm_arch4, we use one of the extendhisi2_mem
8747                      or movhi_bytes patterns for HImode.  For a QImode
8748                      sign extension, we first zero-extend from memory
8749                      and then perform a shift sequence.  */
8750                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8751                     *total += COSTS_N_INSNS (2);
8752                 }
8753               else if (arm_arch6)
8754                 *total += COSTS_N_INSNS (1);
8755
8756               /* We don't have the necessary insn, so we need to perform some
8757                  other operation.  */
8758               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8759                 /* An and with constant 255.  */
8760                 *total += COSTS_N_INSNS (1);
8761               else
8762                 /* A shift sequence.  Increase costs slightly to avoid
8763                    combining two shifts into an extend operation.  */
8764                 *total += COSTS_N_INSNS (2) + 1;
8765             }
8766
8767           return false;
8768         }
8769
8770       switch (GET_MODE (XEXP (x, 0)))
8771         {
8772         case V8QImode:
8773         case V4HImode:
8774         case V2SImode:
8775         case V4QImode:
8776         case V2HImode:
8777           *total = COSTS_N_INSNS (1);
8778           return false;
8779
8780         default:
8781           gcc_unreachable ();
8782         }
8783       gcc_unreachable ();
8784
8785     case ZERO_EXTRACT:
8786     case SIGN_EXTRACT:
8787       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8788       return true;
8789
8790     case CONST_INT:
8791       if (const_ok_for_arm (INTVAL (x))
8792           || const_ok_for_arm (~INTVAL (x)))
8793         *total = COSTS_N_INSNS (1);
8794       else
8795         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8796                                                   INTVAL (x), NULL_RTX,
8797                                                   NULL_RTX, 0, 0));
8798       return true;
8799
8800     case CONST:
8801     case LABEL_REF:
8802     case SYMBOL_REF:
8803       *total = COSTS_N_INSNS (3);
8804       return true;
8805
8806     case HIGH:
8807       *total = COSTS_N_INSNS (1);
8808       return true;
8809
8810     case LO_SUM:
8811       *total = COSTS_N_INSNS (1);
8812       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8813       return true;
8814
8815     case CONST_DOUBLE:
8816       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8817           && (mode == SFmode || !TARGET_VFP_SINGLE))
8818         *total = COSTS_N_INSNS (1);
8819       else
8820         *total = COSTS_N_INSNS (4);
8821       return true;
8822
8823     case SET:
8824       /* The vec_extract patterns accept memory operands that require an
8825          address reload.  Account for the cost of that reload to give the
8826          auto-inc-dec pass an incentive to try to replace them.  */
8827       if (TARGET_NEON && MEM_P (SET_DEST (x))
8828           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8829         {
8830           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8831           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8832             *total += COSTS_N_INSNS (1);
8833           return true;
8834         }
8835       /* Likewise for the vec_set patterns.  */
8836       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8837           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8838           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8839         {
8840           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8841           *total = rtx_cost (mem, code, 0, speed);
8842           if (!neon_vector_mem_operand (mem, 2, true))
8843             *total += COSTS_N_INSNS (1);
8844           return true;
8845         }
8846       return false;
8847
8848     case UNSPEC:
8849       /* We cost this as high as our memory costs to allow this to
8850          be hoisted from loops.  */
8851       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8852         {
8853           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8854         }
8855       return true;
8856
8857     case CONST_VECTOR:
8858       if (TARGET_NEON
8859           && TARGET_HARD_FLOAT
8860           && outer == SET
8861           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8862           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8863         *total = COSTS_N_INSNS (1);
8864       else
8865         *total = COSTS_N_INSNS (4);
8866       return true;
8867
8868     default:
8869       *total = COSTS_N_INSNS (4);
8870       return false;
8871     }
8872 }
8873
8874 /* Estimates the size cost of thumb1 instructions.
8875    For now most of the code is copied from thumb1_rtx_costs. We need more
8876    fine grain tuning when we have more related test cases.  */
8877 static inline int
8878 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8879 {
8880   machine_mode mode = GET_MODE (x);
8881   int words;
8882
8883   switch (code)
8884     {
8885     case ASHIFT:
8886     case ASHIFTRT:
8887     case LSHIFTRT:
8888     case ROTATERT:
8889       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8890
8891     case PLUS:
8892     case MINUS:
8893       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8894          defined by RTL expansion, especially for the expansion of
8895          multiplication.  */
8896       if ((GET_CODE (XEXP (x, 0)) == MULT
8897            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8898           || (GET_CODE (XEXP (x, 1)) == MULT
8899               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8900         return COSTS_N_INSNS (2);
8901       /* On purpose fall through for normal RTX.  */
8902     case COMPARE:
8903     case NEG:
8904     case NOT:
8905       return COSTS_N_INSNS (1);
8906
8907     case MULT:
8908       if (CONST_INT_P (XEXP (x, 1)))
8909         {
8910           /* Thumb1 mul instruction can't operate on const. We must Load it
8911              into a register first.  */
8912           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8913           /* For the targets which have a very small and high-latency multiply
8914              unit, we prefer to synthesize the mult with up to 5 instructions,
8915              giving a good balance between size and performance.  */
8916           if (arm_arch6m && arm_m_profile_small_mul)
8917             return COSTS_N_INSNS (5);
8918           else
8919             return COSTS_N_INSNS (1) + const_size;
8920         }
8921       return COSTS_N_INSNS (1);
8922
8923     case SET:
8924       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8925          the mode.  */
8926       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8927       return COSTS_N_INSNS (words)
8928              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8929                                     || satisfies_constraint_K (SET_SRC (x))
8930                                        /* thumb1_movdi_insn.  */
8931                                     || ((words > 1) && MEM_P (SET_SRC (x))));
8932
8933     case CONST_INT:
8934       if (outer == SET)
8935         {
8936           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8937             return COSTS_N_INSNS (1);
8938           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8939           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8940             return COSTS_N_INSNS (2);
8941           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8942           if (thumb_shiftable_const (INTVAL (x)))
8943             return COSTS_N_INSNS (2);
8944           return COSTS_N_INSNS (3);
8945         }
8946       else if ((outer == PLUS || outer == COMPARE)
8947                && INTVAL (x) < 256 && INTVAL (x) > -256)
8948         return 0;
8949       else if ((outer == IOR || outer == XOR || outer == AND)
8950                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8951         return COSTS_N_INSNS (1);
8952       else if (outer == AND)
8953         {
8954           int i;
8955           /* This duplicates the tests in the andsi3 expander.  */
8956           for (i = 9; i <= 31; i++)
8957             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8958                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8959               return COSTS_N_INSNS (2);
8960         }
8961       else if (outer == ASHIFT || outer == ASHIFTRT
8962                || outer == LSHIFTRT)
8963         return 0;
8964       return COSTS_N_INSNS (2);
8965
8966     case CONST:
8967     case CONST_DOUBLE:
8968     case LABEL_REF:
8969     case SYMBOL_REF:
8970       return COSTS_N_INSNS (3);
8971
8972     case UDIV:
8973     case UMOD:
8974     case DIV:
8975     case MOD:
8976       return 100;
8977
8978     case TRUNCATE:
8979       return 99;
8980
8981     case AND:
8982     case XOR:
8983     case IOR:
8984       return COSTS_N_INSNS (1);
8985
8986     case MEM:
8987       return (COSTS_N_INSNS (1)
8988               + COSTS_N_INSNS (1)
8989                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8990               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8991                  ? COSTS_N_INSNS (1) : 0));
8992
8993     case IF_THEN_ELSE:
8994       /* XXX a guess.  */
8995       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8996         return 14;
8997       return 2;
8998
8999     case ZERO_EXTEND:
9000       /* XXX still guessing.  */
9001       switch (GET_MODE (XEXP (x, 0)))
9002         {
9003           case QImode:
9004             return (1 + (mode == DImode ? 4 : 0)
9005                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9006
9007           case HImode:
9008             return (4 + (mode == DImode ? 4 : 0)
9009                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9010
9011           case SImode:
9012             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9013
9014           default:
9015             return 99;
9016         }
9017
9018     default:
9019       return 99;
9020     }
9021 }
9022
9023 /* RTX costs when optimizing for size.  */
9024 static bool
9025 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9026                     int *total)
9027 {
9028   machine_mode mode = GET_MODE (x);
9029   if (TARGET_THUMB1)
9030     {
9031       *total = thumb1_size_rtx_costs (x, code, outer_code);
9032       return true;
9033     }
9034
9035   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9036   switch (code)
9037     {
9038     case MEM:
9039       /* A memory access costs 1 insn if the mode is small, or the address is
9040          a single register, otherwise it costs one insn per word.  */
9041       if (REG_P (XEXP (x, 0)))
9042         *total = COSTS_N_INSNS (1);
9043       else if (flag_pic
9044                && GET_CODE (XEXP (x, 0)) == PLUS
9045                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9046         /* This will be split into two instructions.
9047            See arm.md:calculate_pic_address.  */
9048         *total = COSTS_N_INSNS (2);
9049       else
9050         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9051       return true;
9052
9053     case DIV:
9054     case MOD:
9055     case UDIV:
9056     case UMOD:
9057       /* Needs a libcall, so it costs about this.  */
9058       *total = COSTS_N_INSNS (2);
9059       return false;
9060
9061     case ROTATE:
9062       if (mode == SImode && REG_P (XEXP (x, 1)))
9063         {
9064           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9065           return true;
9066         }
9067       /* Fall through */
9068     case ROTATERT:
9069     case ASHIFT:
9070     case LSHIFTRT:
9071     case ASHIFTRT:
9072       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9073         {
9074           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9075           return true;
9076         }
9077       else if (mode == SImode)
9078         {
9079           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9080           /* Slightly disparage register shifts, but not by much.  */
9081           if (!CONST_INT_P (XEXP (x, 1)))
9082             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9083           return true;
9084         }
9085
9086       /* Needs a libcall.  */
9087       *total = COSTS_N_INSNS (2);
9088       return false;
9089
9090     case MINUS:
9091       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9092           && (mode == SFmode || !TARGET_VFP_SINGLE))
9093         {
9094           *total = COSTS_N_INSNS (1);
9095           return false;
9096         }
9097
9098       if (mode == SImode)
9099         {
9100           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9101           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9102
9103           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9104               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9105               || subcode1 == ROTATE || subcode1 == ROTATERT
9106               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9107               || subcode1 == ASHIFTRT)
9108             {
9109               /* It's just the cost of the two operands.  */
9110               *total = 0;
9111               return false;
9112             }
9113
9114           *total = COSTS_N_INSNS (1);
9115           return false;
9116         }
9117
9118       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9119       return false;
9120
9121     case PLUS:
9122       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9123           && (mode == SFmode || !TARGET_VFP_SINGLE))
9124         {
9125           *total = COSTS_N_INSNS (1);
9126           return false;
9127         }
9128
9129       /* A shift as a part of ADD costs nothing.  */
9130       if (GET_CODE (XEXP (x, 0)) == MULT
9131           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9132         {
9133           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9134           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9135           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9136           return true;
9137         }
9138
9139       /* Fall through */
9140     case AND: case XOR: case IOR:
9141       if (mode == SImode)
9142         {
9143           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9144
9145           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9146               || subcode == LSHIFTRT || subcode == ASHIFTRT
9147               || (code == AND && subcode == NOT))
9148             {
9149               /* It's just the cost of the two operands.  */
9150               *total = 0;
9151               return false;
9152             }
9153         }
9154
9155       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9156       return false;
9157
9158     case MULT:
9159       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9160       return false;
9161
9162     case NEG:
9163       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9164           && (mode == SFmode || !TARGET_VFP_SINGLE))
9165         {
9166           *total = COSTS_N_INSNS (1);
9167           return false;
9168         }
9169
9170       /* Fall through */
9171     case NOT:
9172       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9173
9174       return false;
9175
9176     case IF_THEN_ELSE:
9177       *total = 0;
9178       return false;
9179
9180     case COMPARE:
9181       if (cc_register (XEXP (x, 0), VOIDmode))
9182         * total = 0;
9183       else
9184         *total = COSTS_N_INSNS (1);
9185       return false;
9186
9187     case ABS:
9188       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9189           && (mode == SFmode || !TARGET_VFP_SINGLE))
9190         *total = COSTS_N_INSNS (1);
9191       else
9192         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9193       return false;
9194
9195     case SIGN_EXTEND:
9196     case ZERO_EXTEND:
9197       return arm_rtx_costs_1 (x, outer_code, total, 0);
9198
9199     case CONST_INT:
9200       if (const_ok_for_arm (INTVAL (x)))
9201         /* A multiplication by a constant requires another instruction
9202            to load the constant to a register.  */
9203         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9204                                 ? 1 : 0);
9205       else if (const_ok_for_arm (~INTVAL (x)))
9206         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9207       else if (const_ok_for_arm (-INTVAL (x)))
9208         {
9209           if (outer_code == COMPARE || outer_code == PLUS
9210               || outer_code == MINUS)
9211             *total = 0;
9212           else
9213             *total = COSTS_N_INSNS (1);
9214         }
9215       else
9216         *total = COSTS_N_INSNS (2);
9217       return true;
9218
9219     case CONST:
9220     case LABEL_REF:
9221     case SYMBOL_REF:
9222       *total = COSTS_N_INSNS (2);
9223       return true;
9224
9225     case CONST_DOUBLE:
9226       *total = COSTS_N_INSNS (4);
9227       return true;
9228
9229     case CONST_VECTOR:
9230       if (TARGET_NEON
9231           && TARGET_HARD_FLOAT
9232           && outer_code == SET
9233           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9234           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9235         *total = COSTS_N_INSNS (1);
9236       else
9237         *total = COSTS_N_INSNS (4);
9238       return true;
9239
9240     case HIGH:
9241     case LO_SUM:
9242       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9243          cost of these slightly.  */
9244       *total = COSTS_N_INSNS (1) + 1;
9245       return true;
9246
9247     case SET:
9248       return false;
9249
9250     default:
9251       if (mode != VOIDmode)
9252         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9253       else
9254         *total = COSTS_N_INSNS (4); /* How knows?  */
9255       return false;
9256     }
9257 }
9258
9259 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9260    operand, then return the operand that is being shifted.  If the shift
9261    is not by a constant, then set SHIFT_REG to point to the operand.
9262    Return NULL if OP is not a shifter operand.  */
9263 static rtx
9264 shifter_op_p (rtx op, rtx *shift_reg)
9265 {
9266   enum rtx_code code = GET_CODE (op);
9267
9268   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9269       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9270     return XEXP (op, 0);
9271   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9272     return XEXP (op, 0);
9273   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9274            || code == ASHIFTRT)
9275     {
9276       if (!CONST_INT_P (XEXP (op, 1)))
9277         *shift_reg = XEXP (op, 1);
9278       return XEXP (op, 0);
9279     }
9280
9281   return NULL;
9282 }
9283
9284 static bool
9285 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9286 {
9287   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9288   gcc_assert (GET_CODE (x) == UNSPEC);
9289
9290   switch (XINT (x, 1))
9291     {
9292     case UNSPEC_UNALIGNED_LOAD:
9293       /* We can only do unaligned loads into the integer unit, and we can't
9294          use LDM or LDRD.  */
9295       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9296       if (speed_p)
9297         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9298                   + extra_cost->ldst.load_unaligned);
9299
9300 #ifdef NOT_YET
9301       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9302                                  ADDR_SPACE_GENERIC, speed_p);
9303 #endif
9304       return true;
9305
9306     case UNSPEC_UNALIGNED_STORE:
9307       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9308       if (speed_p)
9309         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9310                   + extra_cost->ldst.store_unaligned);
9311
9312       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9313 #ifdef NOT_YET
9314       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9315                                  ADDR_SPACE_GENERIC, speed_p);
9316 #endif
9317       return true;
9318
9319     case UNSPEC_VRINTZ:
9320     case UNSPEC_VRINTP:
9321     case UNSPEC_VRINTM:
9322     case UNSPEC_VRINTR:
9323     case UNSPEC_VRINTX:
9324     case UNSPEC_VRINTA:
9325       *cost = COSTS_N_INSNS (1);
9326       if (speed_p)
9327         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9328
9329       return true;
9330     default:
9331       *cost = COSTS_N_INSNS (2);
9332       break;
9333     }
9334   return false;
9335 }
9336
9337 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9338    call (one insn for -Os) and then one for processing the result.  */
9339 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9340
9341 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9342         do                                                              \
9343           {                                                             \
9344             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9345             if (shift_op != NULL                                        \
9346                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9347               {                                                         \
9348                 if (shift_reg)                                          \
9349                   {                                                     \
9350                     if (speed_p)                                        \
9351                       *cost += extra_cost->alu.arith_shift_reg; \
9352                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9353                   }                                                     \
9354                 else if (speed_p)                                       \
9355                   *cost += extra_cost->alu.arith_shift;         \
9356                                                                         \
9357                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9358                           + rtx_cost (XEXP (x, 1 - IDX),                \
9359                                       OP, 1, speed_p));         \
9360                 return true;                                            \
9361               }                                                         \
9362           }                                                             \
9363         while (0);
9364
9365 /* RTX costs.  Make an estimate of the cost of executing the operation
9366    X, which is contained with an operation with code OUTER_CODE.
9367    SPEED_P indicates whether the cost desired is the performance cost,
9368    or the size cost.  The estimate is stored in COST and the return
9369    value is TRUE if the cost calculation is final, or FALSE if the
9370    caller should recurse through the operands of X to add additional
9371    costs.
9372
9373    We currently make no attempt to model the size savings of Thumb-2
9374    16-bit instructions.  At the normal points in compilation where
9375    this code is called we have no measure of whether the condition
9376    flags are live or not, and thus no realistic way to determine what
9377    the size will eventually be.  */
9378 static bool
9379 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9380                    const struct cpu_cost_table *extra_cost,
9381                    int *cost, bool speed_p)
9382 {
9383   machine_mode mode = GET_MODE (x);
9384
9385   if (TARGET_THUMB1)
9386     {
9387       if (speed_p)
9388         *cost = thumb1_rtx_costs (x, code, outer_code);
9389       else
9390         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9391       return true;
9392     }
9393
9394   switch (code)
9395     {
9396     case SET:
9397       *cost = 0;
9398       /* SET RTXs don't have a mode so we get it from the destination.  */
9399       mode = GET_MODE (SET_DEST (x));
9400
9401       if (REG_P (SET_SRC (x))
9402           && REG_P (SET_DEST (x)))
9403         {
9404           /* Assume that most copies can be done with a single insn,
9405              unless we don't have HW FP, in which case everything
9406              larger than word mode will require two insns.  */
9407           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9408                                    && GET_MODE_SIZE (mode) > 4)
9409                                   || mode == DImode)
9410                                  ? 2 : 1);
9411           /* Conditional register moves can be encoded
9412              in 16 bits in Thumb mode.  */
9413           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9414             *cost >>= 1;
9415
9416           return true;
9417         }
9418
9419       if (CONST_INT_P (SET_SRC (x)))
9420         {
9421           /* Handle CONST_INT here, since the value doesn't have a mode
9422              and we would otherwise be unable to work out the true cost.  */
9423           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9424           outer_code = SET;
9425           /* Slightly lower the cost of setting a core reg to a constant.
9426              This helps break up chains and allows for better scheduling.  */
9427           if (REG_P (SET_DEST (x))
9428               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9429             *cost -= 1;
9430           x = SET_SRC (x);
9431           /* Immediate moves with an immediate in the range [0, 255] can be
9432              encoded in 16 bits in Thumb mode.  */
9433           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9434               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9435             *cost >>= 1;
9436           goto const_int_cost;
9437         }
9438
9439       return false;
9440
9441     case MEM:
9442       /* A memory access costs 1 insn if the mode is small, or the address is
9443          a single register, otherwise it costs one insn per word.  */
9444       if (REG_P (XEXP (x, 0)))
9445         *cost = COSTS_N_INSNS (1);
9446       else if (flag_pic
9447                && GET_CODE (XEXP (x, 0)) == PLUS
9448                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9449         /* This will be split into two instructions.
9450            See arm.md:calculate_pic_address.  */
9451         *cost = COSTS_N_INSNS (2);
9452       else
9453         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9454
9455       /* For speed optimizations, add the costs of the address and
9456          accessing memory.  */
9457       if (speed_p)
9458 #ifdef NOT_YET
9459         *cost += (extra_cost->ldst.load
9460                   + arm_address_cost (XEXP (x, 0), mode,
9461                                       ADDR_SPACE_GENERIC, speed_p));
9462 #else
9463         *cost += extra_cost->ldst.load;
9464 #endif
9465       return true;
9466
9467     case PARALLEL:
9468     {
9469    /* Calculations of LDM costs are complex.  We assume an initial cost
9470    (ldm_1st) which will load the number of registers mentioned in
9471    ldm_regs_per_insn_1st registers; then each additional
9472    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9473    formula for N regs is thus:
9474
9475    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9476                              + ldm_regs_per_insn_subsequent - 1)
9477                             / ldm_regs_per_insn_subsequent).
9478
9479    Additional costs may also be added for addressing.  A similar
9480    formula is used for STM.  */
9481
9482       bool is_ldm = load_multiple_operation (x, SImode);
9483       bool is_stm = store_multiple_operation (x, SImode);
9484
9485       *cost = COSTS_N_INSNS (1);
9486
9487       if (is_ldm || is_stm)
9488         {
9489           if (speed_p)
9490             {
9491               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9492               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9493                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9494                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9495               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9496                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9497                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9498
9499               *cost += regs_per_insn_1st
9500                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9501                                             + regs_per_insn_sub - 1)
9502                                           / regs_per_insn_sub);
9503               return true;
9504             }
9505
9506         }
9507       return false;
9508     }
9509     case DIV:
9510     case UDIV:
9511       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9512           && (mode == SFmode || !TARGET_VFP_SINGLE))
9513         *cost = COSTS_N_INSNS (speed_p
9514                                ? extra_cost->fp[mode != SFmode].div : 1);
9515       else if (mode == SImode && TARGET_IDIV)
9516         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9517       else
9518         *cost = LIBCALL_COST (2);
9519       return false;     /* All arguments must be in registers.  */
9520
9521     case MOD:
9522     case UMOD:
9523       *cost = LIBCALL_COST (2);
9524       return false;     /* All arguments must be in registers.  */
9525
9526     case ROTATE:
9527       if (mode == SImode && REG_P (XEXP (x, 1)))
9528         {
9529           *cost = (COSTS_N_INSNS (2)
9530                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9531           if (speed_p)
9532             *cost += extra_cost->alu.shift_reg;
9533           return true;
9534         }
9535       /* Fall through */
9536     case ROTATERT:
9537     case ASHIFT:
9538     case LSHIFTRT:
9539     case ASHIFTRT:
9540       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9541         {
9542           *cost = (COSTS_N_INSNS (3)
9543                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9544           if (speed_p)
9545             *cost += 2 * extra_cost->alu.shift;
9546           return true;
9547         }
9548       else if (mode == SImode)
9549         {
9550           *cost = (COSTS_N_INSNS (1)
9551                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9552           /* Slightly disparage register shifts at -Os, but not by much.  */
9553           if (!CONST_INT_P (XEXP (x, 1)))
9554             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9555                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9556           return true;
9557         }
9558       else if (GET_MODE_CLASS (mode) == MODE_INT
9559                && GET_MODE_SIZE (mode) < 4)
9560         {
9561           if (code == ASHIFT)
9562             {
9563               *cost = (COSTS_N_INSNS (1)
9564                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9565               /* Slightly disparage register shifts at -Os, but not by
9566                  much.  */
9567               if (!CONST_INT_P (XEXP (x, 1)))
9568                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9569                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9570             }
9571           else if (code == LSHIFTRT || code == ASHIFTRT)
9572             {
9573               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9574                 {
9575                   /* Can use SBFX/UBFX.  */
9576                   *cost = COSTS_N_INSNS (1);
9577                   if (speed_p)
9578                     *cost += extra_cost->alu.bfx;
9579                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9580                 }
9581               else
9582                 {
9583                   *cost = COSTS_N_INSNS (2);
9584                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9585                   if (speed_p)
9586                     {
9587                       if (CONST_INT_P (XEXP (x, 1)))
9588                         *cost += 2 * extra_cost->alu.shift;
9589                       else
9590                         *cost += (extra_cost->alu.shift
9591                                   + extra_cost->alu.shift_reg);
9592                     }
9593                   else
9594                     /* Slightly disparage register shifts.  */
9595                     *cost += !CONST_INT_P (XEXP (x, 1));
9596                 }
9597             }
9598           else /* Rotates.  */
9599             {
9600               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9601               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9602               if (speed_p)
9603                 {
9604                   if (CONST_INT_P (XEXP (x, 1)))
9605                     *cost += (2 * extra_cost->alu.shift
9606                               + extra_cost->alu.log_shift);
9607                   else
9608                     *cost += (extra_cost->alu.shift
9609                               + extra_cost->alu.shift_reg
9610                               + extra_cost->alu.log_shift_reg);
9611                 }
9612             }
9613           return true;
9614         }
9615
9616       *cost = LIBCALL_COST (2);
9617       return false;
9618
9619     case BSWAP:
9620       if (arm_arch6)
9621         {
9622           if (mode == SImode)
9623             {
9624               *cost = COSTS_N_INSNS (1);
9625               if (speed_p)
9626                 *cost += extra_cost->alu.rev;
9627
9628               return false;
9629             }
9630         }
9631       else
9632         {
9633         /* No rev instruction available.  Look at arm_legacy_rev
9634            and thumb_legacy_rev for the form of RTL used then.  */
9635           if (TARGET_THUMB)
9636             {
9637               *cost = COSTS_N_INSNS (10);
9638
9639               if (speed_p)
9640                 {
9641                   *cost += 6 * extra_cost->alu.shift;
9642                   *cost += 3 * extra_cost->alu.logical;
9643                 }
9644             }
9645           else
9646             {
9647               *cost = COSTS_N_INSNS (5);
9648
9649               if (speed_p)
9650                 {
9651                   *cost += 2 * extra_cost->alu.shift;
9652                   *cost += extra_cost->alu.arith_shift;
9653                   *cost += 2 * extra_cost->alu.logical;
9654                 }
9655             }
9656           return true;
9657         }
9658       return false;
9659
9660     case MINUS:
9661       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9662           && (mode == SFmode || !TARGET_VFP_SINGLE))
9663         {
9664           *cost = COSTS_N_INSNS (1);
9665           if (GET_CODE (XEXP (x, 0)) == MULT
9666               || GET_CODE (XEXP (x, 1)) == MULT)
9667             {
9668               rtx mul_op0, mul_op1, sub_op;
9669
9670               if (speed_p)
9671                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9672
9673               if (GET_CODE (XEXP (x, 0)) == MULT)
9674                 {
9675                   mul_op0 = XEXP (XEXP (x, 0), 0);
9676                   mul_op1 = XEXP (XEXP (x, 0), 1);
9677                   sub_op = XEXP (x, 1);
9678                 }
9679               else
9680                 {
9681                   mul_op0 = XEXP (XEXP (x, 1), 0);
9682                   mul_op1 = XEXP (XEXP (x, 1), 1);
9683                   sub_op = XEXP (x, 0);
9684                 }
9685
9686               /* The first operand of the multiply may be optionally
9687                  negated.  */
9688               if (GET_CODE (mul_op0) == NEG)
9689                 mul_op0 = XEXP (mul_op0, 0);
9690
9691               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9692                         + rtx_cost (mul_op1, code, 0, speed_p)
9693                         + rtx_cost (sub_op, code, 0, speed_p));
9694
9695               return true;
9696             }
9697
9698           if (speed_p)
9699             *cost += extra_cost->fp[mode != SFmode].addsub;
9700           return false;
9701         }
9702
9703       if (mode == SImode)
9704         {
9705           rtx shift_by_reg = NULL;
9706           rtx shift_op;
9707           rtx non_shift_op;
9708
9709           *cost = COSTS_N_INSNS (1);
9710
9711           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9712           if (shift_op == NULL)
9713             {
9714               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9715               non_shift_op = XEXP (x, 0);
9716             }
9717           else
9718             non_shift_op = XEXP (x, 1);
9719
9720           if (shift_op != NULL)
9721             {
9722               if (shift_by_reg != NULL)
9723                 {
9724                   if (speed_p)
9725                     *cost += extra_cost->alu.arith_shift_reg;
9726                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9727                 }
9728               else if (speed_p)
9729                 *cost += extra_cost->alu.arith_shift;
9730
9731               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9732                         + rtx_cost (non_shift_op, code, 0, speed_p));
9733               return true;
9734             }
9735
9736           if (arm_arch_thumb2
9737               && GET_CODE (XEXP (x, 1)) == MULT)
9738             {
9739               /* MLS.  */
9740               if (speed_p)
9741                 *cost += extra_cost->mult[0].add;
9742               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9743                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9744                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9745               return true;
9746             }
9747
9748           if (CONST_INT_P (XEXP (x, 0)))
9749             {
9750               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9751                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9752                                             NULL_RTX, 1, 0);
9753               *cost = COSTS_N_INSNS (insns);
9754               if (speed_p)
9755                 *cost += insns * extra_cost->alu.arith;
9756               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9757               return true;
9758             }
9759           else if (speed_p)
9760             *cost += extra_cost->alu.arith;
9761
9762           return false;
9763         }
9764
9765       if (GET_MODE_CLASS (mode) == MODE_INT
9766           && GET_MODE_SIZE (mode) < 4)
9767         {
9768           rtx shift_op, shift_reg;
9769           shift_reg = NULL;
9770
9771           /* We check both sides of the MINUS for shifter operands since,
9772              unlike PLUS, it's not commutative.  */
9773
9774           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9775           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9776
9777           /* Slightly disparage, as we might need to widen the result.  */
9778           *cost = 1 + COSTS_N_INSNS (1);
9779           if (speed_p)
9780             *cost += extra_cost->alu.arith;
9781
9782           if (CONST_INT_P (XEXP (x, 0)))
9783             {
9784               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9785               return true;
9786             }
9787
9788           return false;
9789         }
9790
9791       if (mode == DImode)
9792         {
9793           *cost = COSTS_N_INSNS (2);
9794
9795           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9796             {
9797               rtx op1 = XEXP (x, 1);
9798
9799               if (speed_p)
9800                 *cost += 2 * extra_cost->alu.arith;
9801
9802               if (GET_CODE (op1) == ZERO_EXTEND)
9803                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9804               else
9805                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9806               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9807                                  0, speed_p);
9808               return true;
9809             }
9810           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9811             {
9812               if (speed_p)
9813                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9814               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9815                                   0, speed_p)
9816                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9817               return true;
9818             }
9819           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9820                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9821             {
9822               if (speed_p)
9823                 *cost += (extra_cost->alu.arith
9824                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9825                              ? extra_cost->alu.arith
9826                              : extra_cost->alu.arith_shift));
9827               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9828                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9829                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9830               return true;
9831             }
9832
9833           if (speed_p)
9834             *cost += 2 * extra_cost->alu.arith;
9835           return false;
9836         }
9837
9838       /* Vector mode?  */
9839
9840       *cost = LIBCALL_COST (2);
9841       return false;
9842
9843     case PLUS:
9844       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9845           && (mode == SFmode || !TARGET_VFP_SINGLE))
9846         {
9847           *cost = COSTS_N_INSNS (1);
9848           if (GET_CODE (XEXP (x, 0)) == MULT)
9849             {
9850               rtx mul_op0, mul_op1, add_op;
9851
9852               if (speed_p)
9853                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9854
9855               mul_op0 = XEXP (XEXP (x, 0), 0);
9856               mul_op1 = XEXP (XEXP (x, 0), 1);
9857               add_op = XEXP (x, 1);
9858
9859               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9860                         + rtx_cost (mul_op1, code, 0, speed_p)
9861                         + rtx_cost (add_op, code, 0, speed_p));
9862
9863               return true;
9864             }
9865
9866           if (speed_p)
9867             *cost += extra_cost->fp[mode != SFmode].addsub;
9868           return false;
9869         }
9870       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9871         {
9872           *cost = LIBCALL_COST (2);
9873           return false;
9874         }
9875
9876         /* Narrow modes can be synthesized in SImode, but the range
9877            of useful sub-operations is limited.  Check for shift operations
9878            on one of the operands.  Only left shifts can be used in the
9879            narrow modes.  */
9880       if (GET_MODE_CLASS (mode) == MODE_INT
9881           && GET_MODE_SIZE (mode) < 4)
9882         {
9883           rtx shift_op, shift_reg;
9884           shift_reg = NULL;
9885
9886           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9887
9888           if (CONST_INT_P (XEXP (x, 1)))
9889             {
9890               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9891                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9892                                             NULL_RTX, 1, 0);
9893               *cost = COSTS_N_INSNS (insns);
9894               if (speed_p)
9895                 *cost += insns * extra_cost->alu.arith;
9896               /* Slightly penalize a narrow operation as the result may
9897                  need widening.  */
9898               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9899               return true;
9900             }
9901
9902           /* Slightly penalize a narrow operation as the result may
9903              need widening.  */
9904           *cost = 1 + COSTS_N_INSNS (1);
9905           if (speed_p)
9906             *cost += extra_cost->alu.arith;
9907
9908           return false;
9909         }
9910
9911       if (mode == SImode)
9912         {
9913           rtx shift_op, shift_reg;
9914
9915           *cost = COSTS_N_INSNS (1);
9916           if (TARGET_INT_SIMD
9917               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9918                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9919             {
9920               /* UXTA[BH] or SXTA[BH].  */
9921               if (speed_p)
9922                 *cost += extra_cost->alu.extend_arith;
9923               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9924                                   speed_p)
9925                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9926               return true;
9927             }
9928
9929           shift_reg = NULL;
9930           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9931           if (shift_op != NULL)
9932             {
9933               if (shift_reg)
9934                 {
9935                   if (speed_p)
9936                     *cost += extra_cost->alu.arith_shift_reg;
9937                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9938                 }
9939               else if (speed_p)
9940                 *cost += extra_cost->alu.arith_shift;
9941
9942               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9943                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9944               return true;
9945             }
9946           if (GET_CODE (XEXP (x, 0)) == MULT)
9947             {
9948               rtx mul_op = XEXP (x, 0);
9949
9950               *cost = COSTS_N_INSNS (1);
9951
9952               if (TARGET_DSP_MULTIPLY
9953                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9954                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9955                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9956                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9957                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9958                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9959                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9960                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9961                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9962                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9963                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9964                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9965                                       == 16))))))
9966                 {
9967                   /* SMLA[BT][BT].  */
9968                   if (speed_p)
9969                     *cost += extra_cost->mult[0].extend_add;
9970                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9971                                       SIGN_EXTEND, 0, speed_p)
9972                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9973                                         SIGN_EXTEND, 0, speed_p)
9974                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9975                   return true;
9976                 }
9977
9978               if (speed_p)
9979                 *cost += extra_cost->mult[0].add;
9980               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9981                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9982                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9983               return true;
9984             }
9985           if (CONST_INT_P (XEXP (x, 1)))
9986             {
9987               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9988                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9989                                             NULL_RTX, 1, 0);
9990               *cost = COSTS_N_INSNS (insns);
9991               if (speed_p)
9992                 *cost += insns * extra_cost->alu.arith;
9993               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9994               return true;
9995             }
9996           else if (speed_p)
9997             *cost += extra_cost->alu.arith;
9998
9999           return false;
10000         }
10001
10002       if (mode == DImode)
10003         {
10004           if (arm_arch3m
10005               && GET_CODE (XEXP (x, 0)) == MULT
10006               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10007                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10008                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10009                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10010             {
10011               *cost = COSTS_N_INSNS (1);
10012               if (speed_p)
10013                 *cost += extra_cost->mult[1].extend_add;
10014               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10015                                   ZERO_EXTEND, 0, speed_p)
10016                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10017                                     ZERO_EXTEND, 0, speed_p)
10018                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10019               return true;
10020             }
10021
10022           *cost = COSTS_N_INSNS (2);
10023
10024           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10025               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10026             {
10027               if (speed_p)
10028                 *cost += (extra_cost->alu.arith
10029                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10030                              ? extra_cost->alu.arith
10031                              : extra_cost->alu.arith_shift));
10032
10033               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10034                                   speed_p)
10035                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036               return true;
10037             }
10038
10039           if (speed_p)
10040             *cost += 2 * extra_cost->alu.arith;
10041           return false;
10042         }
10043
10044       /* Vector mode?  */
10045       *cost = LIBCALL_COST (2);
10046       return false;
10047     case IOR:
10048       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10049         {
10050           *cost = COSTS_N_INSNS (1);
10051           if (speed_p)
10052             *cost += extra_cost->alu.rev;
10053
10054           return true;
10055         }
10056     /* Fall through.  */
10057     case AND: case XOR:
10058       if (mode == SImode)
10059         {
10060           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10061           rtx op0 = XEXP (x, 0);
10062           rtx shift_op, shift_reg;
10063
10064           *cost = COSTS_N_INSNS (1);
10065
10066           if (subcode == NOT
10067               && (code == AND
10068                   || (code == IOR && TARGET_THUMB2)))
10069             op0 = XEXP (op0, 0);
10070
10071           shift_reg = NULL;
10072           shift_op = shifter_op_p (op0, &shift_reg);
10073           if (shift_op != NULL)
10074             {
10075               if (shift_reg)
10076                 {
10077                   if (speed_p)
10078                     *cost += extra_cost->alu.log_shift_reg;
10079                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10080                 }
10081               else if (speed_p)
10082                 *cost += extra_cost->alu.log_shift;
10083
10084               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10085                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10086               return true;
10087             }
10088
10089           if (CONST_INT_P (XEXP (x, 1)))
10090             {
10091               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10092                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10093                                             NULL_RTX, 1, 0);
10094
10095               *cost = COSTS_N_INSNS (insns);
10096               if (speed_p)
10097                 *cost += insns * extra_cost->alu.logical;
10098               *cost += rtx_cost (op0, code, 0, speed_p);
10099               return true;
10100             }
10101
10102           if (speed_p)
10103             *cost += extra_cost->alu.logical;
10104           *cost += (rtx_cost (op0, code, 0, speed_p)
10105                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10106           return true;
10107         }
10108
10109       if (mode == DImode)
10110         {
10111           rtx op0 = XEXP (x, 0);
10112           enum rtx_code subcode = GET_CODE (op0);
10113
10114           *cost = COSTS_N_INSNS (2);
10115
10116           if (subcode == NOT
10117               && (code == AND
10118                   || (code == IOR && TARGET_THUMB2)))
10119             op0 = XEXP (op0, 0);
10120
10121           if (GET_CODE (op0) == ZERO_EXTEND)
10122             {
10123               if (speed_p)
10124                 *cost += 2 * extra_cost->alu.logical;
10125
10126               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10127                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10128               return true;
10129             }
10130           else if (GET_CODE (op0) == SIGN_EXTEND)
10131             {
10132               if (speed_p)
10133                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10134
10135               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10136                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10137               return true;
10138             }
10139
10140           if (speed_p)
10141             *cost += 2 * extra_cost->alu.logical;
10142
10143           return true;
10144         }
10145       /* Vector mode?  */
10146
10147       *cost = LIBCALL_COST (2);
10148       return false;
10149
10150     case MULT:
10151       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10152           && (mode == SFmode || !TARGET_VFP_SINGLE))
10153         {
10154           rtx op0 = XEXP (x, 0);
10155
10156           *cost = COSTS_N_INSNS (1);
10157
10158           if (GET_CODE (op0) == NEG)
10159             op0 = XEXP (op0, 0);
10160
10161           if (speed_p)
10162             *cost += extra_cost->fp[mode != SFmode].mult;
10163
10164           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10165                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10166           return true;
10167         }
10168       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10169         {
10170           *cost = LIBCALL_COST (2);
10171           return false;
10172         }
10173
10174       if (mode == SImode)
10175         {
10176           *cost = COSTS_N_INSNS (1);
10177           if (TARGET_DSP_MULTIPLY
10178               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10179                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10180                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10181                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10182                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10183                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10184                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10185                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10186                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10187                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10188                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10189                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10190                                   == 16))))))
10191             {
10192               /* SMUL[TB][TB].  */
10193               if (speed_p)
10194                 *cost += extra_cost->mult[0].extend;
10195               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10196                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10197               return true;
10198             }
10199           if (speed_p)
10200             *cost += extra_cost->mult[0].simple;
10201           return false;
10202         }
10203
10204       if (mode == DImode)
10205         {
10206           if (arm_arch3m
10207               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10208                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10209                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10210                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10211             {
10212               *cost = COSTS_N_INSNS (1);
10213               if (speed_p)
10214                 *cost += extra_cost->mult[1].extend;
10215               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10216                                   ZERO_EXTEND, 0, speed_p)
10217                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10218                                     ZERO_EXTEND, 0, speed_p));
10219               return true;
10220             }
10221
10222           *cost = LIBCALL_COST (2);
10223           return false;
10224         }
10225
10226       /* Vector mode?  */
10227       *cost = LIBCALL_COST (2);
10228       return false;
10229
10230     case NEG:
10231       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10232           && (mode == SFmode || !TARGET_VFP_SINGLE))
10233         {
10234           *cost = COSTS_N_INSNS (1);
10235           if (speed_p)
10236             *cost += extra_cost->fp[mode != SFmode].neg;
10237
10238           return false;
10239         }
10240       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10241         {
10242           *cost = LIBCALL_COST (1);
10243           return false;
10244         }
10245
10246       if (mode == SImode)
10247         {
10248           if (GET_CODE (XEXP (x, 0)) == ABS)
10249             {
10250               *cost = COSTS_N_INSNS (2);
10251               /* Assume the non-flag-changing variant.  */
10252               if (speed_p)
10253                 *cost += (extra_cost->alu.log_shift
10254                           + extra_cost->alu.arith_shift);
10255               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10256               return true;
10257             }
10258
10259           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10260               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10261             {
10262               *cost = COSTS_N_INSNS (2);
10263               /* No extra cost for MOV imm and MVN imm.  */
10264               /* If the comparison op is using the flags, there's no further
10265                  cost, otherwise we need to add the cost of the comparison.  */
10266               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10267                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10268                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10269                 {
10270                   *cost += (COSTS_N_INSNS (1)
10271                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10272                                         speed_p)
10273                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10274                                         speed_p));
10275                   if (speed_p)
10276                     *cost += extra_cost->alu.arith;
10277                 }
10278               return true;
10279             }
10280           *cost = COSTS_N_INSNS (1);
10281           if (speed_p)
10282             *cost += extra_cost->alu.arith;
10283           return false;
10284         }
10285
10286       if (GET_MODE_CLASS (mode) == MODE_INT
10287           && GET_MODE_SIZE (mode) < 4)
10288         {
10289           /* Slightly disparage, as we might need an extend operation.  */
10290           *cost = 1 + COSTS_N_INSNS (1);
10291           if (speed_p)
10292             *cost += extra_cost->alu.arith;
10293           return false;
10294         }
10295
10296       if (mode == DImode)
10297         {
10298           *cost = COSTS_N_INSNS (2);
10299           if (speed_p)
10300             *cost += 2 * extra_cost->alu.arith;
10301           return false;
10302         }
10303
10304       /* Vector mode?  */
10305       *cost = LIBCALL_COST (1);
10306       return false;
10307
10308     case NOT:
10309       if (mode == SImode)
10310         {
10311           rtx shift_op;
10312           rtx shift_reg = NULL;
10313
10314           *cost = COSTS_N_INSNS (1);
10315           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10316
10317           if (shift_op)
10318             {
10319               if (shift_reg != NULL)
10320                 {
10321                   if (speed_p)
10322                     *cost += extra_cost->alu.log_shift_reg;
10323                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10324                 }
10325               else if (speed_p)
10326                 *cost += extra_cost->alu.log_shift;
10327               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10328               return true;
10329             }
10330
10331           if (speed_p)
10332             *cost += extra_cost->alu.logical;
10333           return false;
10334         }
10335       if (mode == DImode)
10336         {
10337           *cost = COSTS_N_INSNS (2);
10338           return false;
10339         }
10340
10341       /* Vector mode?  */
10342
10343       *cost += LIBCALL_COST (1);
10344       return false;
10345
10346     case IF_THEN_ELSE:
10347       {
10348         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10349           {
10350             *cost = COSTS_N_INSNS (4);
10351             return true;
10352           }
10353         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10354         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10355
10356         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10357         /* Assume that if one arm of the if_then_else is a register,
10358            that it will be tied with the result and eliminate the
10359            conditional insn.  */
10360         if (REG_P (XEXP (x, 1)))
10361           *cost += op2cost;
10362         else if (REG_P (XEXP (x, 2)))
10363           *cost += op1cost;
10364         else
10365           {
10366             if (speed_p)
10367               {
10368                 if (extra_cost->alu.non_exec_costs_exec)
10369                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10370                 else
10371                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10372               }
10373             else
10374               *cost += op1cost + op2cost;
10375           }
10376       }
10377       return true;
10378
10379     case COMPARE:
10380       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10381         *cost = 0;
10382       else
10383         {
10384           machine_mode op0mode;
10385           /* We'll mostly assume that the cost of a compare is the cost of the
10386              LHS.  However, there are some notable exceptions.  */
10387
10388           /* Floating point compares are never done as side-effects.  */
10389           op0mode = GET_MODE (XEXP (x, 0));
10390           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10391               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10392             {
10393               *cost = COSTS_N_INSNS (1);
10394               if (speed_p)
10395                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10396
10397               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10398                 {
10399                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10400                   return true;
10401                 }
10402
10403               return false;
10404             }
10405           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10406             {
10407               *cost = LIBCALL_COST (2);
10408               return false;
10409             }
10410
10411           /* DImode compares normally take two insns.  */
10412           if (op0mode == DImode)
10413             {
10414               *cost = COSTS_N_INSNS (2);
10415               if (speed_p)
10416                 *cost += 2 * extra_cost->alu.arith;
10417               return false;
10418             }
10419
10420           if (op0mode == SImode)
10421             {
10422               rtx shift_op;
10423               rtx shift_reg;
10424
10425               if (XEXP (x, 1) == const0_rtx
10426                   && !(REG_P (XEXP (x, 0))
10427                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10428                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10429                 {
10430                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10431
10432                   /* Multiply operations that set the flags are often
10433                      significantly more expensive.  */
10434                   if (speed_p
10435                       && GET_CODE (XEXP (x, 0)) == MULT
10436                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10437                     *cost += extra_cost->mult[0].flag_setting;
10438
10439                   if (speed_p
10440                       && GET_CODE (XEXP (x, 0)) == PLUS
10441                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10442                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10443                                                             0), 1), mode))
10444                     *cost += extra_cost->mult[0].flag_setting;
10445                   return true;
10446                 }
10447
10448               shift_reg = NULL;
10449               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10450               if (shift_op != NULL)
10451                 {
10452                   *cost = COSTS_N_INSNS (1);
10453                   if (shift_reg != NULL)
10454                     {
10455                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10456                       if (speed_p)
10457                         *cost += extra_cost->alu.arith_shift_reg;
10458                     }
10459                   else if (speed_p)
10460                     *cost += extra_cost->alu.arith_shift;
10461                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10462                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10463                   return true;
10464                 }
10465
10466               *cost = COSTS_N_INSNS (1);
10467               if (speed_p)
10468                 *cost += extra_cost->alu.arith;
10469               if (CONST_INT_P (XEXP (x, 1))
10470                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10471                 {
10472                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10473                   return true;
10474                 }
10475               return false;
10476             }
10477
10478           /* Vector mode?  */
10479
10480           *cost = LIBCALL_COST (2);
10481           return false;
10482         }
10483       return true;
10484
10485     case EQ:
10486     case NE:
10487     case LT:
10488     case LE:
10489     case GT:
10490     case GE:
10491     case LTU:
10492     case LEU:
10493     case GEU:
10494     case GTU:
10495     case ORDERED:
10496     case UNORDERED:
10497     case UNEQ:
10498     case UNLE:
10499     case UNLT:
10500     case UNGE:
10501     case UNGT:
10502     case LTGT:
10503       if (outer_code == SET)
10504         {
10505           /* Is it a store-flag operation?  */
10506           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10507               && XEXP (x, 1) == const0_rtx)
10508             {
10509               /* Thumb also needs an IT insn.  */
10510               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10511               return true;
10512             }
10513           if (XEXP (x, 1) == const0_rtx)
10514             {
10515               switch (code)
10516                 {
10517                 case LT:
10518                   /* LSR Rd, Rn, #31.  */
10519                   *cost = COSTS_N_INSNS (1);
10520                   if (speed_p)
10521                     *cost += extra_cost->alu.shift;
10522                   break;
10523
10524                 case EQ:
10525                   /* RSBS T1, Rn, #0
10526                      ADC  Rd, Rn, T1.  */
10527
10528                 case NE:
10529                   /* SUBS T1, Rn, #1
10530                      SBC  Rd, Rn, T1.  */
10531                   *cost = COSTS_N_INSNS (2);
10532                   break;
10533
10534                 case LE:
10535                   /* RSBS T1, Rn, Rn, LSR #31
10536                      ADC  Rd, Rn, T1. */
10537                   *cost = COSTS_N_INSNS (2);
10538                   if (speed_p)
10539                     *cost += extra_cost->alu.arith_shift;
10540                   break;
10541
10542                 case GT:
10543                   /* RSB  Rd, Rn, Rn, ASR #1
10544                      LSR  Rd, Rd, #31.  */
10545                   *cost = COSTS_N_INSNS (2);
10546                   if (speed_p)
10547                     *cost += (extra_cost->alu.arith_shift
10548                               + extra_cost->alu.shift);
10549                   break;
10550
10551                 case GE:
10552                   /* ASR  Rd, Rn, #31
10553                      ADD  Rd, Rn, #1.  */
10554                   *cost = COSTS_N_INSNS (2);
10555                   if (speed_p)
10556                     *cost += extra_cost->alu.shift;
10557                   break;
10558
10559                 default:
10560                   /* Remaining cases are either meaningless or would take
10561                      three insns anyway.  */
10562                   *cost = COSTS_N_INSNS (3);
10563                   break;
10564                 }
10565               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10566               return true;
10567             }
10568           else
10569             {
10570               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10571               if (CONST_INT_P (XEXP (x, 1))
10572                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10573                 {
10574                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10575                   return true;
10576                 }
10577
10578               return false;
10579             }
10580         }
10581       /* Not directly inside a set.  If it involves the condition code
10582          register it must be the condition for a branch, cond_exec or
10583          I_T_E operation.  Since the comparison is performed elsewhere
10584          this is just the control part which has no additional
10585          cost.  */
10586       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10587                && XEXP (x, 1) == const0_rtx)
10588         {
10589           *cost = 0;
10590           return true;
10591         }
10592       return false;
10593
10594     case ABS:
10595       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10596           && (mode == SFmode || !TARGET_VFP_SINGLE))
10597         {
10598           *cost = COSTS_N_INSNS (1);
10599           if (speed_p)
10600             *cost += extra_cost->fp[mode != SFmode].neg;
10601
10602           return false;
10603         }
10604       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10605         {
10606           *cost = LIBCALL_COST (1);
10607           return false;
10608         }
10609
10610       if (mode == SImode)
10611         {
10612           *cost = COSTS_N_INSNS (1);
10613           if (speed_p)
10614             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10615           return false;
10616         }
10617       /* Vector mode?  */
10618       *cost = LIBCALL_COST (1);
10619       return false;
10620
10621     case SIGN_EXTEND:
10622       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10623           && MEM_P (XEXP (x, 0)))
10624         {
10625           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10626
10627           if (mode == DImode)
10628             *cost += COSTS_N_INSNS (1);
10629
10630           if (!speed_p)
10631             return true;
10632
10633           if (GET_MODE (XEXP (x, 0)) == SImode)
10634             *cost += extra_cost->ldst.load;
10635           else
10636             *cost += extra_cost->ldst.load_sign_extend;
10637
10638           if (mode == DImode)
10639             *cost += extra_cost->alu.shift;
10640
10641           return true;
10642         }
10643
10644       /* Widening from less than 32-bits requires an extend operation.  */
10645       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10646         {
10647           /* We have SXTB/SXTH.  */
10648           *cost = COSTS_N_INSNS (1);
10649           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10650           if (speed_p)
10651             *cost += extra_cost->alu.extend;
10652         }
10653       else if (GET_MODE (XEXP (x, 0)) != SImode)
10654         {
10655           /* Needs two shifts.  */
10656           *cost = COSTS_N_INSNS (2);
10657           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10658           if (speed_p)
10659             *cost += 2 * extra_cost->alu.shift;
10660         }
10661
10662       /* Widening beyond 32-bits requires one more insn.  */
10663       if (mode == DImode)
10664         {
10665           *cost += COSTS_N_INSNS (1);
10666           if (speed_p)
10667             *cost += extra_cost->alu.shift;
10668         }
10669
10670       return true;
10671
10672     case ZERO_EXTEND:
10673       if ((arm_arch4
10674            || GET_MODE (XEXP (x, 0)) == SImode
10675            || GET_MODE (XEXP (x, 0)) == QImode)
10676           && MEM_P (XEXP (x, 0)))
10677         {
10678           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10679
10680           if (mode == DImode)
10681             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10682
10683           return true;
10684         }
10685
10686       /* Widening from less than 32-bits requires an extend operation.  */
10687       if (GET_MODE (XEXP (x, 0)) == QImode)
10688         {
10689           /* UXTB can be a shorter instruction in Thumb2, but it might
10690              be slower than the AND Rd, Rn, #255 alternative.  When
10691              optimizing for speed it should never be slower to use
10692              AND, and we don't really model 16-bit vs 32-bit insns
10693              here.  */
10694           *cost = COSTS_N_INSNS (1);
10695           if (speed_p)
10696             *cost += extra_cost->alu.logical;
10697         }
10698       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10699         {
10700           /* We have UXTB/UXTH.  */
10701           *cost = COSTS_N_INSNS (1);
10702           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10703           if (speed_p)
10704             *cost += extra_cost->alu.extend;
10705         }
10706       else if (GET_MODE (XEXP (x, 0)) != SImode)
10707         {
10708           /* Needs two shifts.  It's marginally preferable to use
10709              shifts rather than two BIC instructions as the second
10710              shift may merge with a subsequent insn as a shifter
10711              op.  */
10712           *cost = COSTS_N_INSNS (2);
10713           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10714           if (speed_p)
10715             *cost += 2 * extra_cost->alu.shift;
10716         }
10717       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10718         *cost = COSTS_N_INSNS (1);
10719
10720       /* Widening beyond 32-bits requires one more insn.  */
10721       if (mode == DImode)
10722         {
10723           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10724         }
10725
10726       return true;
10727
10728     case CONST_INT:
10729       *cost = 0;
10730       /* CONST_INT has no mode, so we cannot tell for sure how many
10731          insns are really going to be needed.  The best we can do is
10732          look at the value passed.  If it fits in SImode, then assume
10733          that's the mode it will be used for.  Otherwise assume it
10734          will be used in DImode.  */
10735       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10736         mode = SImode;
10737       else
10738         mode = DImode;
10739
10740       /* Avoid blowing up in arm_gen_constant ().  */
10741       if (!(outer_code == PLUS
10742             || outer_code == AND
10743             || outer_code == IOR
10744             || outer_code == XOR
10745             || outer_code == MINUS))
10746         outer_code = SET;
10747
10748     const_int_cost:
10749       if (mode == SImode)
10750         {
10751           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10752                                                     INTVAL (x), NULL, NULL,
10753                                                     0, 0));
10754           /* Extra costs?  */
10755         }
10756       else
10757         {
10758           *cost += COSTS_N_INSNS (arm_gen_constant
10759                                   (outer_code, SImode, NULL,
10760                                    trunc_int_for_mode (INTVAL (x), SImode),
10761                                    NULL, NULL, 0, 0)
10762                                   + arm_gen_constant (outer_code, SImode, NULL,
10763                                                       INTVAL (x) >> 32, NULL,
10764                                                       NULL, 0, 0));
10765           /* Extra costs?  */
10766         }
10767
10768       return true;
10769
10770     case CONST:
10771     case LABEL_REF:
10772     case SYMBOL_REF:
10773       if (speed_p)
10774         {
10775           if (arm_arch_thumb2 && !flag_pic)
10776             *cost = COSTS_N_INSNS (2);
10777           else
10778             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10779         }
10780       else
10781         *cost = COSTS_N_INSNS (2);
10782
10783       if (flag_pic)
10784         {
10785           *cost += COSTS_N_INSNS (1);
10786           if (speed_p)
10787             *cost += extra_cost->alu.arith;
10788         }
10789
10790       return true;
10791
10792     case CONST_FIXED:
10793       *cost = COSTS_N_INSNS (4);
10794       /* Fixme.  */
10795       return true;
10796
10797     case CONST_DOUBLE:
10798       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10799           && (mode == SFmode || !TARGET_VFP_SINGLE))
10800         {
10801           if (vfp3_const_double_rtx (x))
10802             {
10803               *cost = COSTS_N_INSNS (1);
10804               if (speed_p)
10805                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10806               return true;
10807             }
10808
10809           if (speed_p)
10810             {
10811               *cost = COSTS_N_INSNS (1);
10812               if (mode == DFmode)
10813                 *cost += extra_cost->ldst.loadd;
10814               else
10815                 *cost += extra_cost->ldst.loadf;
10816             }
10817           else
10818             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10819
10820           return true;
10821         }
10822       *cost = COSTS_N_INSNS (4);
10823       return true;
10824
10825     case CONST_VECTOR:
10826       /* Fixme.  */
10827       if (TARGET_NEON
10828           && TARGET_HARD_FLOAT
10829           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10830           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10831         *cost = COSTS_N_INSNS (1);
10832       else
10833         *cost = COSTS_N_INSNS (4);
10834       return true;
10835
10836     case HIGH:
10837     case LO_SUM:
10838       *cost = COSTS_N_INSNS (1);
10839       /* When optimizing for size, we prefer constant pool entries to
10840          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10841       if (!speed_p)
10842         *cost += 1;
10843       return true;
10844
10845     case CLZ:
10846       *cost = COSTS_N_INSNS (1);
10847       if (speed_p)
10848         *cost += extra_cost->alu.clz;
10849       return false;
10850
10851     case SMIN:
10852       if (XEXP (x, 1) == const0_rtx)
10853         {
10854           *cost = COSTS_N_INSNS (1);
10855           if (speed_p)
10856             *cost += extra_cost->alu.log_shift;
10857           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10858           return true;
10859         }
10860       /* Fall through.  */
10861     case SMAX:
10862     case UMIN:
10863     case UMAX:
10864       *cost = COSTS_N_INSNS (2);
10865       return false;
10866
10867     case TRUNCATE:
10868       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10869           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10870           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10871           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10872           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10873                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10874               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10875                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10876                       == ZERO_EXTEND))))
10877         {
10878           *cost = COSTS_N_INSNS (1);
10879           if (speed_p)
10880             *cost += extra_cost->mult[1].extend;
10881           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10882                               speed_p)
10883                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10884                                 0, speed_p));
10885           return true;
10886         }
10887       *cost = LIBCALL_COST (1);
10888       return false;
10889
10890     case UNSPEC:
10891       return arm_unspec_cost (x, outer_code, speed_p, cost);
10892
10893     case PC:
10894       /* Reading the PC is like reading any other register.  Writing it
10895          is more expensive, but we take that into account elsewhere.  */
10896       *cost = 0;
10897       return true;
10898
10899     case ZERO_EXTRACT:
10900       /* TODO: Simple zero_extract of bottom bits using AND.  */
10901       /* Fall through.  */
10902     case SIGN_EXTRACT:
10903       if (arm_arch6
10904           && mode == SImode
10905           && CONST_INT_P (XEXP (x, 1))
10906           && CONST_INT_P (XEXP (x, 2)))
10907         {
10908           *cost = COSTS_N_INSNS (1);
10909           if (speed_p)
10910             *cost += extra_cost->alu.bfx;
10911           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10912           return true;
10913         }
10914       /* Without UBFX/SBFX, need to resort to shift operations.  */
10915       *cost = COSTS_N_INSNS (2);
10916       if (speed_p)
10917         *cost += 2 * extra_cost->alu.shift;
10918       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10919       return true;
10920
10921     case FLOAT_EXTEND:
10922       if (TARGET_HARD_FLOAT)
10923         {
10924           *cost = COSTS_N_INSNS (1);
10925           if (speed_p)
10926             *cost += extra_cost->fp[mode == DFmode].widen;
10927           if (!TARGET_FPU_ARMV8
10928               && GET_MODE (XEXP (x, 0)) == HFmode)
10929             {
10930               /* Pre v8, widening HF->DF is a two-step process, first
10931                  widening to SFmode.  */
10932               *cost += COSTS_N_INSNS (1);
10933               if (speed_p)
10934                 *cost += extra_cost->fp[0].widen;
10935             }
10936           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10937           return true;
10938         }
10939
10940       *cost = LIBCALL_COST (1);
10941       return false;
10942
10943     case FLOAT_TRUNCATE:
10944       if (TARGET_HARD_FLOAT)
10945         {
10946           *cost = COSTS_N_INSNS (1);
10947           if (speed_p)
10948             *cost += extra_cost->fp[mode == DFmode].narrow;
10949           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10950           return true;
10951           /* Vector modes?  */
10952         }
10953       *cost = LIBCALL_COST (1);
10954       return false;
10955
10956     case FMA:
10957       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10958         {
10959           rtx op0 = XEXP (x, 0);
10960           rtx op1 = XEXP (x, 1);
10961           rtx op2 = XEXP (x, 2);
10962
10963           *cost = COSTS_N_INSNS (1);
10964
10965           /* vfms or vfnma.  */
10966           if (GET_CODE (op0) == NEG)
10967             op0 = XEXP (op0, 0);
10968
10969           /* vfnms or vfnma.  */
10970           if (GET_CODE (op2) == NEG)
10971             op2 = XEXP (op2, 0);
10972
10973           *cost += rtx_cost (op0, FMA, 0, speed_p);
10974           *cost += rtx_cost (op1, FMA, 1, speed_p);
10975           *cost += rtx_cost (op2, FMA, 2, speed_p);
10976
10977           if (speed_p)
10978             *cost += extra_cost->fp[mode ==DFmode].fma;
10979
10980           return true;
10981         }
10982
10983       *cost = LIBCALL_COST (3);
10984       return false;
10985
10986     case FIX:
10987     case UNSIGNED_FIX:
10988       if (TARGET_HARD_FLOAT)
10989         {
10990           if (GET_MODE_CLASS (mode) == MODE_INT)
10991             {
10992               *cost = COSTS_N_INSNS (1);
10993               if (speed_p)
10994                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10995               /* Strip of the 'cost' of rounding towards zero.  */
10996               if (GET_CODE (XEXP (x, 0)) == FIX)
10997                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10998               else
10999                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11000               /* ??? Increase the cost to deal with transferring from
11001                  FP -> CORE registers?  */
11002               return true;
11003             }
11004           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11005                    && TARGET_FPU_ARMV8)
11006             {
11007               *cost = COSTS_N_INSNS (1);
11008               if (speed_p)
11009                 *cost += extra_cost->fp[mode == DFmode].roundint;
11010               return false;
11011             }
11012           /* Vector costs? */
11013         }
11014       *cost = LIBCALL_COST (1);
11015       return false;
11016
11017     case FLOAT:
11018     case UNSIGNED_FLOAT:
11019       if (TARGET_HARD_FLOAT)
11020         {
11021           /* ??? Increase the cost to deal with transferring from CORE
11022              -> FP registers?  */
11023           *cost = COSTS_N_INSNS (1);
11024           if (speed_p)
11025             *cost += extra_cost->fp[mode == DFmode].fromint;
11026           return false;
11027         }
11028       *cost = LIBCALL_COST (1);
11029       return false;
11030
11031     case CALL:
11032       *cost = COSTS_N_INSNS (1);
11033       return true;
11034
11035     case ASM_OPERANDS:
11036       {
11037       /* Just a guess.  Guess number of instructions in the asm
11038          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11039          though (see PR60663).  */
11040         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11041         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11042
11043         *cost = COSTS_N_INSNS (asm_length + num_operands);
11044         return true;
11045       }
11046     default:
11047       if (mode != VOIDmode)
11048         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11049       else
11050         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11051       return false;
11052     }
11053 }
11054
11055 #undef HANDLE_NARROW_SHIFT_ARITH
11056
11057 /* RTX costs when optimizing for size.  */
11058 static bool
11059 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11060                int *total, bool speed)
11061 {
11062   bool result;
11063
11064   if (TARGET_OLD_RTX_COSTS
11065       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11066     {
11067       /* Old way.  (Deprecated.)  */
11068       if (!speed)
11069         result = arm_size_rtx_costs (x, (enum rtx_code) code,
11070                                      (enum rtx_code) outer_code, total);
11071       else
11072         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11073                                           (enum rtx_code) outer_code, total,
11074                                           speed);
11075     }
11076   else
11077     {
11078     /* New way.  */
11079       if (current_tune->insn_extra_cost)
11080         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11081                                      (enum rtx_code) outer_code,
11082                                      current_tune->insn_extra_cost,
11083                                      total, speed);
11084     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11085        && current_tune->insn_extra_cost != NULL  */
11086       else
11087         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11088                                     (enum rtx_code) outer_code,
11089                                     &generic_extra_costs, total, speed);
11090     }
11091
11092   if (dump_file && (dump_flags & TDF_DETAILS))
11093     {
11094       print_rtl_single (dump_file, x);
11095       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11096                *total, result ? "final" : "partial");
11097     }
11098   return result;
11099 }
11100
11101 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11102    supported on any "slowmul" cores, so it can be ignored.  */
11103
11104 static bool
11105 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11106                        int *total, bool speed)
11107 {
11108   machine_mode mode = GET_MODE (x);
11109
11110   if (TARGET_THUMB)
11111     {
11112       *total = thumb1_rtx_costs (x, code, outer_code);
11113       return true;
11114     }
11115
11116   switch (code)
11117     {
11118     case MULT:
11119       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11120           || mode == DImode)
11121         {
11122           *total = COSTS_N_INSNS (20);
11123           return false;
11124         }
11125
11126       if (CONST_INT_P (XEXP (x, 1)))
11127         {
11128           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11129                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11130           int cost, const_ok = const_ok_for_arm (i);
11131           int j, booth_unit_size;
11132
11133           /* Tune as appropriate.  */
11134           cost = const_ok ? 4 : 8;
11135           booth_unit_size = 2;
11136           for (j = 0; i && j < 32; j += booth_unit_size)
11137             {
11138               i >>= booth_unit_size;
11139               cost++;
11140             }
11141
11142           *total = COSTS_N_INSNS (cost);
11143           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11144           return true;
11145         }
11146
11147       *total = COSTS_N_INSNS (20);
11148       return false;
11149
11150     default:
11151       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11152     }
11153 }
11154
11155
11156 /* RTX cost for cores with a fast multiply unit (M variants).  */
11157
11158 static bool
11159 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11160                        int *total, bool speed)
11161 {
11162   machine_mode mode = GET_MODE (x);
11163
11164   if (TARGET_THUMB1)
11165     {
11166       *total = thumb1_rtx_costs (x, code, outer_code);
11167       return true;
11168     }
11169
11170   /* ??? should thumb2 use different costs?  */
11171   switch (code)
11172     {
11173     case MULT:
11174       /* There is no point basing this on the tuning, since it is always the
11175          fast variant if it exists at all.  */
11176       if (mode == DImode
11177           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11178           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11179               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11180         {
11181           *total = COSTS_N_INSNS(2);
11182           return false;
11183         }
11184
11185
11186       if (mode == DImode)
11187         {
11188           *total = COSTS_N_INSNS (5);
11189           return false;
11190         }
11191
11192       if (CONST_INT_P (XEXP (x, 1)))
11193         {
11194           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11195                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11196           int cost, const_ok = const_ok_for_arm (i);
11197           int j, booth_unit_size;
11198
11199           /* Tune as appropriate.  */
11200           cost = const_ok ? 4 : 8;
11201           booth_unit_size = 8;
11202           for (j = 0; i && j < 32; j += booth_unit_size)
11203             {
11204               i >>= booth_unit_size;
11205               cost++;
11206             }
11207
11208           *total = COSTS_N_INSNS(cost);
11209           return false;
11210         }
11211
11212       if (mode == SImode)
11213         {
11214           *total = COSTS_N_INSNS (4);
11215           return false;
11216         }
11217
11218       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11219         {
11220           if (TARGET_HARD_FLOAT
11221               && (mode == SFmode
11222                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11223             {
11224               *total = COSTS_N_INSNS (1);
11225               return false;
11226             }
11227         }
11228
11229       /* Requires a lib call */
11230       *total = COSTS_N_INSNS (20);
11231       return false;
11232
11233     default:
11234       return arm_rtx_costs_1 (x, outer_code, total, speed);
11235     }
11236 }
11237
11238
11239 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11240    so it can be ignored.  */
11241
11242 static bool
11243 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11244                       int *total, bool speed)
11245 {
11246   machine_mode mode = GET_MODE (x);
11247
11248   if (TARGET_THUMB)
11249     {
11250       *total = thumb1_rtx_costs (x, code, outer_code);
11251       return true;
11252     }
11253
11254   switch (code)
11255     {
11256     case COMPARE:
11257       if (GET_CODE (XEXP (x, 0)) != MULT)
11258         return arm_rtx_costs_1 (x, outer_code, total, speed);
11259
11260       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11261          will stall until the multiplication is complete.  */
11262       *total = COSTS_N_INSNS (3);
11263       return false;
11264
11265     case MULT:
11266       /* There is no point basing this on the tuning, since it is always the
11267          fast variant if it exists at all.  */
11268       if (mode == DImode
11269           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11270           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11271               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11272         {
11273           *total = COSTS_N_INSNS (2);
11274           return false;
11275         }
11276
11277
11278       if (mode == DImode)
11279         {
11280           *total = COSTS_N_INSNS (5);
11281           return false;
11282         }
11283
11284       if (CONST_INT_P (XEXP (x, 1)))
11285         {
11286           /* If operand 1 is a constant we can more accurately
11287              calculate the cost of the multiply.  The multiplier can
11288              retire 15 bits on the first cycle and a further 12 on the
11289              second.  We do, of course, have to load the constant into
11290              a register first.  */
11291           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11292           /* There's a general overhead of one cycle.  */
11293           int cost = 1;
11294           unsigned HOST_WIDE_INT masked_const;
11295
11296           if (i & 0x80000000)
11297             i = ~i;
11298
11299           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11300
11301           masked_const = i & 0xffff8000;
11302           if (masked_const != 0)
11303             {
11304               cost++;
11305               masked_const = i & 0xf8000000;
11306               if (masked_const != 0)
11307                 cost++;
11308             }
11309           *total = COSTS_N_INSNS (cost);
11310           return false;
11311         }
11312
11313       if (mode == SImode)
11314         {
11315           *total = COSTS_N_INSNS (3);
11316           return false;
11317         }
11318
11319       /* Requires a lib call */
11320       *total = COSTS_N_INSNS (20);
11321       return false;
11322
11323     default:
11324       return arm_rtx_costs_1 (x, outer_code, total, speed);
11325     }
11326 }
11327
11328
11329 /* RTX costs for 9e (and later) cores.  */
11330
11331 static bool
11332 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11333                   int *total, bool speed)
11334 {
11335   machine_mode mode = GET_MODE (x);
11336
11337   if (TARGET_THUMB1)
11338     {
11339       switch (code)
11340         {
11341         case MULT:
11342           /* Small multiply: 32 cycles for an integer multiply inst.  */
11343           if (arm_arch6m && arm_m_profile_small_mul)
11344             *total = COSTS_N_INSNS (32);
11345           else
11346             *total = COSTS_N_INSNS (3);
11347           return true;
11348
11349         default:
11350           *total = thumb1_rtx_costs (x, code, outer_code);
11351           return true;
11352         }
11353     }
11354
11355   switch (code)
11356     {
11357     case MULT:
11358       /* There is no point basing this on the tuning, since it is always the
11359          fast variant if it exists at all.  */
11360       if (mode == DImode
11361           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11362           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11363               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11364         {
11365           *total = COSTS_N_INSNS (2);
11366           return false;
11367         }
11368
11369
11370       if (mode == DImode)
11371         {
11372           *total = COSTS_N_INSNS (5);
11373           return false;
11374         }
11375
11376       if (mode == SImode)
11377         {
11378           *total = COSTS_N_INSNS (2);
11379           return false;
11380         }
11381
11382       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11383         {
11384           if (TARGET_HARD_FLOAT
11385               && (mode == SFmode
11386                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11387             {
11388               *total = COSTS_N_INSNS (1);
11389               return false;
11390             }
11391         }
11392
11393       *total = COSTS_N_INSNS (20);
11394       return false;
11395
11396     default:
11397       return arm_rtx_costs_1 (x, outer_code, total, speed);
11398     }
11399 }
11400 /* All address computations that can be done are free, but rtx cost returns
11401    the same for practically all of them.  So we weight the different types
11402    of address here in the order (most pref first):
11403    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11404 static inline int
11405 arm_arm_address_cost (rtx x)
11406 {
11407   enum rtx_code c  = GET_CODE (x);
11408
11409   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11410     return 0;
11411   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11412     return 10;
11413
11414   if (c == PLUS)
11415     {
11416       if (CONST_INT_P (XEXP (x, 1)))
11417         return 2;
11418
11419       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11420         return 3;
11421
11422       return 4;
11423     }
11424
11425   return 6;
11426 }
11427
11428 static inline int
11429 arm_thumb_address_cost (rtx x)
11430 {
11431   enum rtx_code c  = GET_CODE (x);
11432
11433   if (c == REG)
11434     return 1;
11435   if (c == PLUS
11436       && REG_P (XEXP (x, 0))
11437       && CONST_INT_P (XEXP (x, 1)))
11438     return 1;
11439
11440   return 2;
11441 }
11442
11443 static int
11444 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11445                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11446 {
11447   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11448 }
11449
11450 /* Adjust cost hook for XScale.  */
11451 static bool
11452 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11453 {
11454   /* Some true dependencies can have a higher cost depending
11455      on precisely how certain input operands are used.  */
11456   if (REG_NOTE_KIND(link) == 0
11457       && recog_memoized (insn) >= 0
11458       && recog_memoized (dep) >= 0)
11459     {
11460       int shift_opnum = get_attr_shift (insn);
11461       enum attr_type attr_type = get_attr_type (dep);
11462
11463       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11464          operand for INSN.  If we have a shifted input operand and the
11465          instruction we depend on is another ALU instruction, then we may
11466          have to account for an additional stall.  */
11467       if (shift_opnum != 0
11468           && (attr_type == TYPE_ALU_SHIFT_IMM
11469               || attr_type == TYPE_ALUS_SHIFT_IMM
11470               || attr_type == TYPE_LOGIC_SHIFT_IMM
11471               || attr_type == TYPE_LOGICS_SHIFT_IMM
11472               || attr_type == TYPE_ALU_SHIFT_REG
11473               || attr_type == TYPE_ALUS_SHIFT_REG
11474               || attr_type == TYPE_LOGIC_SHIFT_REG
11475               || attr_type == TYPE_LOGICS_SHIFT_REG
11476               || attr_type == TYPE_MOV_SHIFT
11477               || attr_type == TYPE_MVN_SHIFT
11478               || attr_type == TYPE_MOV_SHIFT_REG
11479               || attr_type == TYPE_MVN_SHIFT_REG))
11480         {
11481           rtx shifted_operand;
11482           int opno;
11483
11484           /* Get the shifted operand.  */
11485           extract_insn (insn);
11486           shifted_operand = recog_data.operand[shift_opnum];
11487
11488           /* Iterate over all the operands in DEP.  If we write an operand
11489              that overlaps with SHIFTED_OPERAND, then we have increase the
11490              cost of this dependency.  */
11491           extract_insn (dep);
11492           preprocess_constraints (dep);
11493           for (opno = 0; opno < recog_data.n_operands; opno++)
11494             {
11495               /* We can ignore strict inputs.  */
11496               if (recog_data.operand_type[opno] == OP_IN)
11497                 continue;
11498
11499               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11500                                            shifted_operand))
11501                 {
11502                   *cost = 2;
11503                   return false;
11504                 }
11505             }
11506         }
11507     }
11508   return true;
11509 }
11510
11511 /* Adjust cost hook for Cortex A9.  */
11512 static bool
11513 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11514 {
11515   switch (REG_NOTE_KIND (link))
11516     {
11517     case REG_DEP_ANTI:
11518       *cost = 0;
11519       return false;
11520
11521     case REG_DEP_TRUE:
11522     case REG_DEP_OUTPUT:
11523         if (recog_memoized (insn) >= 0
11524             && recog_memoized (dep) >= 0)
11525           {
11526             if (GET_CODE (PATTERN (insn)) == SET)
11527               {
11528                 if (GET_MODE_CLASS
11529                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11530                   || GET_MODE_CLASS
11531                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11532                   {
11533                     enum attr_type attr_type_insn = get_attr_type (insn);
11534                     enum attr_type attr_type_dep = get_attr_type (dep);
11535
11536                     /* By default all dependencies of the form
11537                        s0 = s0 <op> s1
11538                        s0 = s0 <op> s2
11539                        have an extra latency of 1 cycle because
11540                        of the input and output dependency in this
11541                        case. However this gets modeled as an true
11542                        dependency and hence all these checks.  */
11543                     if (REG_P (SET_DEST (PATTERN (insn)))
11544                         && REG_P (SET_DEST (PATTERN (dep)))
11545                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11546                                                     SET_DEST (PATTERN (dep))))
11547                       {
11548                         /* FMACS is a special case where the dependent
11549                            instruction can be issued 3 cycles before
11550                            the normal latency in case of an output
11551                            dependency.  */
11552                         if ((attr_type_insn == TYPE_FMACS
11553                              || attr_type_insn == TYPE_FMACD)
11554                             && (attr_type_dep == TYPE_FMACS
11555                                 || attr_type_dep == TYPE_FMACD))
11556                           {
11557                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11558                               *cost = insn_default_latency (dep) - 3;
11559                             else
11560                               *cost = insn_default_latency (dep);
11561                             return false;
11562                           }
11563                         else
11564                           {
11565                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11566                               *cost = insn_default_latency (dep) + 1;
11567                             else
11568                               *cost = insn_default_latency (dep);
11569                           }
11570                         return false;
11571                       }
11572                   }
11573               }
11574           }
11575         break;
11576
11577     default:
11578       gcc_unreachable ();
11579     }
11580
11581   return true;
11582 }
11583
11584 /* Adjust cost hook for FA726TE.  */
11585 static bool
11586 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11587 {
11588   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11589      have penalty of 3.  */
11590   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11591       && recog_memoized (insn) >= 0
11592       && recog_memoized (dep) >= 0
11593       && get_attr_conds (dep) == CONDS_SET)
11594     {
11595       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11596       if (get_attr_conds (insn) == CONDS_USE
11597           && get_attr_type (insn) != TYPE_BRANCH)
11598         {
11599           *cost = 3;
11600           return false;
11601         }
11602
11603       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11604           || get_attr_conds (insn) == CONDS_USE)
11605         {
11606           *cost = 0;
11607           return false;
11608         }
11609     }
11610
11611   return true;
11612 }
11613
11614 /* Implement TARGET_REGISTER_MOVE_COST.
11615
11616    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11617    it is typically more expensive than a single memory access.  We set
11618    the cost to less than two memory accesses so that floating
11619    point to integer conversion does not go through memory.  */
11620
11621 int
11622 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11623                         reg_class_t from, reg_class_t to)
11624 {
11625   if (TARGET_32BIT)
11626     {
11627       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11628           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11629         return 15;
11630       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11631                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11632         return 4;
11633       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11634         return 20;
11635       else
11636         return 2;
11637     }
11638   else
11639     {
11640       if (from == HI_REGS || to == HI_REGS)
11641         return 4;
11642       else
11643         return 2;
11644     }
11645 }
11646
11647 /* Implement TARGET_MEMORY_MOVE_COST.  */
11648
11649 int
11650 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11651                       bool in ATTRIBUTE_UNUSED)
11652 {
11653   if (TARGET_32BIT)
11654     return 10;
11655   else
11656     {
11657       if (GET_MODE_SIZE (mode) < 4)
11658         return 8;
11659       else
11660         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11661     }
11662 }
11663
11664 /* Vectorizer cost model implementation.  */
11665
11666 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11667 static int
11668 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11669                                 tree vectype,
11670                                 int misalign ATTRIBUTE_UNUSED)
11671 {
11672   unsigned elements;
11673
11674   switch (type_of_cost)
11675     {
11676       case scalar_stmt:
11677         return current_tune->vec_costs->scalar_stmt_cost;
11678
11679       case scalar_load:
11680         return current_tune->vec_costs->scalar_load_cost;
11681
11682       case scalar_store:
11683         return current_tune->vec_costs->scalar_store_cost;
11684
11685       case vector_stmt:
11686         return current_tune->vec_costs->vec_stmt_cost;
11687
11688       case vector_load:
11689         return current_tune->vec_costs->vec_align_load_cost;
11690
11691       case vector_store:
11692         return current_tune->vec_costs->vec_store_cost;
11693
11694       case vec_to_scalar:
11695         return current_tune->vec_costs->vec_to_scalar_cost;
11696
11697       case scalar_to_vec:
11698         return current_tune->vec_costs->scalar_to_vec_cost;
11699
11700       case unaligned_load:
11701         return current_tune->vec_costs->vec_unalign_load_cost;
11702
11703       case unaligned_store:
11704         return current_tune->vec_costs->vec_unalign_store_cost;
11705
11706       case cond_branch_taken:
11707         return current_tune->vec_costs->cond_taken_branch_cost;
11708
11709       case cond_branch_not_taken:
11710         return current_tune->vec_costs->cond_not_taken_branch_cost;
11711
11712       case vec_perm:
11713       case vec_promote_demote:
11714         return current_tune->vec_costs->vec_stmt_cost;
11715
11716       case vec_construct:
11717         elements = TYPE_VECTOR_SUBPARTS (vectype);
11718         return elements / 2 + 1;
11719
11720       default:
11721         gcc_unreachable ();
11722     }
11723 }
11724
11725 /* Implement targetm.vectorize.add_stmt_cost.  */
11726
11727 static unsigned
11728 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11729                    struct _stmt_vec_info *stmt_info, int misalign,
11730                    enum vect_cost_model_location where)
11731 {
11732   unsigned *cost = (unsigned *) data;
11733   unsigned retval = 0;
11734
11735   if (flag_vect_cost_model)
11736     {
11737       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11738       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11739
11740       /* Statements in an inner loop relative to the loop being
11741          vectorized are weighted more heavily.  The value here is
11742          arbitrary and could potentially be improved with analysis.  */
11743       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11744         count *= 50;  /* FIXME.  */
11745
11746       retval = (unsigned) (count * stmt_cost);
11747       cost[where] += retval;
11748     }
11749
11750   return retval;
11751 }
11752
11753 /* Return true if and only if this insn can dual-issue only as older.  */
11754 static bool
11755 cortexa7_older_only (rtx_insn *insn)
11756 {
11757   if (recog_memoized (insn) < 0)
11758     return false;
11759
11760   switch (get_attr_type (insn))
11761     {
11762     case TYPE_ALU_DSP_REG:
11763     case TYPE_ALU_SREG:
11764     case TYPE_ALUS_SREG:
11765     case TYPE_LOGIC_REG:
11766     case TYPE_LOGICS_REG:
11767     case TYPE_ADC_REG:
11768     case TYPE_ADCS_REG:
11769     case TYPE_ADR:
11770     case TYPE_BFM:
11771     case TYPE_REV:
11772     case TYPE_MVN_REG:
11773     case TYPE_SHIFT_IMM:
11774     case TYPE_SHIFT_REG:
11775     case TYPE_LOAD_BYTE:
11776     case TYPE_LOAD1:
11777     case TYPE_STORE1:
11778     case TYPE_FFARITHS:
11779     case TYPE_FADDS:
11780     case TYPE_FFARITHD:
11781     case TYPE_FADDD:
11782     case TYPE_FMOV:
11783     case TYPE_F_CVT:
11784     case TYPE_FCMPS:
11785     case TYPE_FCMPD:
11786     case TYPE_FCONSTS:
11787     case TYPE_FCONSTD:
11788     case TYPE_FMULS:
11789     case TYPE_FMACS:
11790     case TYPE_FMULD:
11791     case TYPE_FMACD:
11792     case TYPE_FDIVS:
11793     case TYPE_FDIVD:
11794     case TYPE_F_MRC:
11795     case TYPE_F_MRRC:
11796     case TYPE_F_FLAG:
11797     case TYPE_F_LOADS:
11798     case TYPE_F_STORES:
11799       return true;
11800     default:
11801       return false;
11802     }
11803 }
11804
11805 /* Return true if and only if this insn can dual-issue as younger.  */
11806 static bool
11807 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11808 {
11809   if (recog_memoized (insn) < 0)
11810     {
11811       if (verbose > 5)
11812         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11813       return false;
11814     }
11815
11816   switch (get_attr_type (insn))
11817     {
11818     case TYPE_ALU_IMM:
11819     case TYPE_ALUS_IMM:
11820     case TYPE_LOGIC_IMM:
11821     case TYPE_LOGICS_IMM:
11822     case TYPE_EXTEND:
11823     case TYPE_MVN_IMM:
11824     case TYPE_MOV_IMM:
11825     case TYPE_MOV_REG:
11826     case TYPE_MOV_SHIFT:
11827     case TYPE_MOV_SHIFT_REG:
11828     case TYPE_BRANCH:
11829     case TYPE_CALL:
11830       return true;
11831     default:
11832       return false;
11833     }
11834 }
11835
11836
11837 /* Look for an instruction that can dual issue only as an older
11838    instruction, and move it in front of any instructions that can
11839    dual-issue as younger, while preserving the relative order of all
11840    other instructions in the ready list.  This is a hueuristic to help
11841    dual-issue in later cycles, by postponing issue of more flexible
11842    instructions.  This heuristic may affect dual issue opportunities
11843    in the current cycle.  */
11844 static void
11845 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11846                         int *n_readyp, int clock)
11847 {
11848   int i;
11849   int first_older_only = -1, first_younger = -1;
11850
11851   if (verbose > 5)
11852     fprintf (file,
11853              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11854              clock,
11855              *n_readyp);
11856
11857   /* Traverse the ready list from the head (the instruction to issue
11858      first), and looking for the first instruction that can issue as
11859      younger and the first instruction that can dual-issue only as
11860      older.  */
11861   for (i = *n_readyp - 1; i >= 0; i--)
11862     {
11863       rtx_insn *insn = ready[i];
11864       if (cortexa7_older_only (insn))
11865         {
11866           first_older_only = i;
11867           if (verbose > 5)
11868             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11869           break;
11870         }
11871       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11872         first_younger = i;
11873     }
11874
11875   /* Nothing to reorder because either no younger insn found or insn
11876      that can dual-issue only as older appears before any insn that
11877      can dual-issue as younger.  */
11878   if (first_younger == -1)
11879     {
11880       if (verbose > 5)
11881         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11882       return;
11883     }
11884
11885   /* Nothing to reorder because no older-only insn in the ready list.  */
11886   if (first_older_only == -1)
11887     {
11888       if (verbose > 5)
11889         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11890       return;
11891     }
11892
11893   /* Move first_older_only insn before first_younger.  */
11894   if (verbose > 5)
11895     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11896              INSN_UID(ready [first_older_only]),
11897              INSN_UID(ready [first_younger]));
11898   rtx_insn *first_older_only_insn = ready [first_older_only];
11899   for (i = first_older_only; i < first_younger; i++)
11900     {
11901       ready[i] = ready[i+1];
11902     }
11903
11904   ready[i] = first_older_only_insn;
11905   return;
11906 }
11907
11908 /* Implement TARGET_SCHED_REORDER. */
11909 static int
11910 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11911                    int clock)
11912 {
11913   switch (arm_tune)
11914     {
11915     case cortexa7:
11916       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11917       break;
11918     default:
11919       /* Do nothing for other cores.  */
11920       break;
11921     }
11922
11923   return arm_issue_rate ();
11924 }
11925
11926 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11927    It corrects the value of COST based on the relationship between
11928    INSN and DEP through the dependence LINK.  It returns the new
11929    value. There is a per-core adjust_cost hook to adjust scheduler costs
11930    and the per-core hook can choose to completely override the generic
11931    adjust_cost function. Only put bits of code into arm_adjust_cost that
11932    are common across all cores.  */
11933 static int
11934 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11935 {
11936   rtx i_pat, d_pat;
11937
11938  /* When generating Thumb-1 code, we want to place flag-setting operations
11939     close to a conditional branch which depends on them, so that we can
11940     omit the comparison. */
11941   if (TARGET_THUMB1
11942       && REG_NOTE_KIND (link) == 0
11943       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11944       && recog_memoized (dep) >= 0
11945       && get_attr_conds (dep) == CONDS_SET)
11946     return 0;
11947
11948   if (current_tune->sched_adjust_cost != NULL)
11949     {
11950       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11951         return cost;
11952     }
11953
11954   /* XXX Is this strictly true?  */
11955   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11956       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11957     return 0;
11958
11959   /* Call insns don't incur a stall, even if they follow a load.  */
11960   if (REG_NOTE_KIND (link) == 0
11961       && CALL_P (insn))
11962     return 1;
11963
11964   if ((i_pat = single_set (insn)) != NULL
11965       && MEM_P (SET_SRC (i_pat))
11966       && (d_pat = single_set (dep)) != NULL
11967       && MEM_P (SET_DEST (d_pat)))
11968     {
11969       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11970       /* This is a load after a store, there is no conflict if the load reads
11971          from a cached area.  Assume that loads from the stack, and from the
11972          constant pool are cached, and that others will miss.  This is a
11973          hack.  */
11974
11975       if ((GET_CODE (src_mem) == SYMBOL_REF
11976            && CONSTANT_POOL_ADDRESS_P (src_mem))
11977           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11978           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11979           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11980         return 1;
11981     }
11982
11983   return cost;
11984 }
11985
11986 int
11987 arm_max_conditional_execute (void)
11988 {
11989   return max_insns_skipped;
11990 }
11991
11992 static int
11993 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11994 {
11995   if (TARGET_32BIT)
11996     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11997   else
11998     return (optimize > 0) ? 2 : 0;
11999 }
12000
12001 static int
12002 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12003 {
12004   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12005 }
12006
12007 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12008    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12009    sequences of non-executed instructions in IT blocks probably take the same
12010    amount of time as executed instructions (and the IT instruction itself takes
12011    space in icache).  This function was experimentally determined to give good
12012    results on a popular embedded benchmark.  */
12013
12014 static int
12015 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12016 {
12017   return (TARGET_32BIT && speed_p) ? 1
12018          : arm_default_branch_cost (speed_p, predictable_p);
12019 }
12020
12021 static bool fp_consts_inited = false;
12022
12023 static REAL_VALUE_TYPE value_fp0;
12024
12025 static void
12026 init_fp_table (void)
12027 {
12028   REAL_VALUE_TYPE r;
12029
12030   r = REAL_VALUE_ATOF ("0", DFmode);
12031   value_fp0 = r;
12032   fp_consts_inited = true;
12033 }
12034
12035 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12036 int
12037 arm_const_double_rtx (rtx x)
12038 {
12039   REAL_VALUE_TYPE r;
12040
12041   if (!fp_consts_inited)
12042     init_fp_table ();
12043
12044   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12045   if (REAL_VALUE_MINUS_ZERO (r))
12046     return 0;
12047
12048   if (REAL_VALUES_EQUAL (r, value_fp0))
12049     return 1;
12050
12051   return 0;
12052 }
12053
12054 /* VFPv3 has a fairly wide range of representable immediates, formed from
12055    "quarter-precision" floating-point values. These can be evaluated using this
12056    formula (with ^ for exponentiation):
12057
12058      -1^s * n * 2^-r
12059
12060    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12061    16 <= n <= 31 and 0 <= r <= 7.
12062
12063    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12064
12065      - A (most-significant) is the sign bit.
12066      - BCD are the exponent (encoded as r XOR 3).
12067      - EFGH are the mantissa (encoded as n - 16).
12068 */
12069
12070 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12071    fconst[sd] instruction, or -1 if X isn't suitable.  */
12072 static int
12073 vfp3_const_double_index (rtx x)
12074 {
12075   REAL_VALUE_TYPE r, m;
12076   int sign, exponent;
12077   unsigned HOST_WIDE_INT mantissa, mant_hi;
12078   unsigned HOST_WIDE_INT mask;
12079   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12080   bool fail;
12081
12082   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12083     return -1;
12084
12085   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12086
12087   /* We can't represent these things, so detect them first.  */
12088   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12089     return -1;
12090
12091   /* Extract sign, exponent and mantissa.  */
12092   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12093   r = real_value_abs (&r);
12094   exponent = REAL_EXP (&r);
12095   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12096      highest (sign) bit, with a fixed binary point at bit point_pos.
12097      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12098      bits for the mantissa, this may fail (low bits would be lost).  */
12099   real_ldexp (&m, &r, point_pos - exponent);
12100   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12101   mantissa = w.elt (0);
12102   mant_hi = w.elt (1);
12103
12104   /* If there are bits set in the low part of the mantissa, we can't
12105      represent this value.  */
12106   if (mantissa != 0)
12107     return -1;
12108
12109   /* Now make it so that mantissa contains the most-significant bits, and move
12110      the point_pos to indicate that the least-significant bits have been
12111      discarded.  */
12112   point_pos -= HOST_BITS_PER_WIDE_INT;
12113   mantissa = mant_hi;
12114
12115   /* We can permit four significant bits of mantissa only, plus a high bit
12116      which is always 1.  */
12117   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12118   if ((mantissa & mask) != 0)
12119     return -1;
12120
12121   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12122   mantissa >>= point_pos - 5;
12123
12124   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12125      floating-point immediate zero with Neon using an integer-zero load, but
12126      that case is handled elsewhere.)  */
12127   if (mantissa == 0)
12128     return -1;
12129
12130   gcc_assert (mantissa >= 16 && mantissa <= 31);
12131
12132   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12133      normalized significands are in the range [1, 2). (Our mantissa is shifted
12134      left 4 places at this point relative to normalized IEEE754 values).  GCC
12135      internally uses [0.5, 1) (see real.c), so the exponent returned from
12136      REAL_EXP must be altered.  */
12137   exponent = 5 - exponent;
12138
12139   if (exponent < 0 || exponent > 7)
12140     return -1;
12141
12142   /* Sign, mantissa and exponent are now in the correct form to plug into the
12143      formula described in the comment above.  */
12144   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12145 }
12146
12147 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12148 int
12149 vfp3_const_double_rtx (rtx x)
12150 {
12151   if (!TARGET_VFP3)
12152     return 0;
12153
12154   return vfp3_const_double_index (x) != -1;
12155 }
12156
12157 /* Recognize immediates which can be used in various Neon instructions. Legal
12158    immediates are described by the following table (for VMVN variants, the
12159    bitwise inverse of the constant shown is recognized. In either case, VMOV
12160    is output and the correct instruction to use for a given constant is chosen
12161    by the assembler). The constant shown is replicated across all elements of
12162    the destination vector.
12163
12164    insn elems variant constant (binary)
12165    ---- ----- ------- -----------------
12166    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12167    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12168    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12169    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12170    vmov  i16     4    00000000 abcdefgh
12171    vmov  i16     5    abcdefgh 00000000
12172    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12173    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12174    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12175    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12176    vmvn  i16    10    00000000 abcdefgh
12177    vmvn  i16    11    abcdefgh 00000000
12178    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12179    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12180    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12181    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12182    vmov   i8    16    abcdefgh
12183    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12184                       eeeeeeee ffffffff gggggggg hhhhhhhh
12185    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12186    vmov  f32    19    00000000 00000000 00000000 00000000
12187
12188    For case 18, B = !b. Representable values are exactly those accepted by
12189    vfp3_const_double_index, but are output as floating-point numbers rather
12190    than indices.
12191
12192    For case 19, we will change it to vmov.i32 when assembling.
12193
12194    Variants 0-5 (inclusive) may also be used as immediates for the second
12195    operand of VORR/VBIC instructions.
12196
12197    The INVERSE argument causes the bitwise inverse of the given operand to be
12198    recognized instead (used for recognizing legal immediates for the VAND/VORN
12199    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12200    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12201    output, rather than the real insns vbic/vorr).
12202
12203    INVERSE makes no difference to the recognition of float vectors.
12204
12205    The return value is the variant of immediate as shown in the above table, or
12206    -1 if the given value doesn't match any of the listed patterns.
12207 */
12208 static int
12209 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12210                       rtx *modconst, int *elementwidth)
12211 {
12212 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12213   matches = 1;                                  \
12214   for (i = 0; i < idx; i += (STRIDE))           \
12215     if (!(TEST))                                \
12216       matches = 0;                              \
12217   if (matches)                                  \
12218     {                                           \
12219       immtype = (CLASS);                        \
12220       elsize = (ELSIZE);                        \
12221       break;                                    \
12222     }
12223
12224   unsigned int i, elsize = 0, idx = 0, n_elts;
12225   unsigned int innersize;
12226   unsigned char bytes[16];
12227   int immtype = -1, matches;
12228   unsigned int invmask = inverse ? 0xff : 0;
12229   bool vector = GET_CODE (op) == CONST_VECTOR;
12230
12231   if (vector)
12232     {
12233       n_elts = CONST_VECTOR_NUNITS (op);
12234       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12235     }
12236   else
12237     {
12238       n_elts = 1;
12239       if (mode == VOIDmode)
12240         mode = DImode;
12241       innersize = GET_MODE_SIZE (mode);
12242     }
12243
12244   /* Vectors of float constants.  */
12245   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12246     {
12247       rtx el0 = CONST_VECTOR_ELT (op, 0);
12248       REAL_VALUE_TYPE r0;
12249
12250       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12251         return -1;
12252
12253       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12254
12255       for (i = 1; i < n_elts; i++)
12256         {
12257           rtx elt = CONST_VECTOR_ELT (op, i);
12258           REAL_VALUE_TYPE re;
12259
12260           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12261
12262           if (!REAL_VALUES_EQUAL (r0, re))
12263             return -1;
12264         }
12265
12266       if (modconst)
12267         *modconst = CONST_VECTOR_ELT (op, 0);
12268
12269       if (elementwidth)
12270         *elementwidth = 0;
12271
12272       if (el0 == CONST0_RTX (GET_MODE (el0)))
12273         return 19;
12274       else
12275         return 18;
12276     }
12277
12278   /* Splat vector constant out into a byte vector.  */
12279   for (i = 0; i < n_elts; i++)
12280     {
12281       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12282       unsigned HOST_WIDE_INT elpart;
12283       unsigned int part, parts;
12284
12285       if (CONST_INT_P (el))
12286         {
12287           elpart = INTVAL (el);
12288           parts = 1;
12289         }
12290       else if (CONST_DOUBLE_P (el))
12291         {
12292           elpart = CONST_DOUBLE_LOW (el);
12293           parts = 2;
12294         }
12295       else
12296         gcc_unreachable ();
12297
12298       for (part = 0; part < parts; part++)
12299         {
12300           unsigned int byte;
12301           for (byte = 0; byte < innersize; byte++)
12302             {
12303               bytes[idx++] = (elpart & 0xff) ^ invmask;
12304               elpart >>= BITS_PER_UNIT;
12305             }
12306           if (CONST_DOUBLE_P (el))
12307             elpart = CONST_DOUBLE_HIGH (el);
12308         }
12309     }
12310
12311   /* Sanity check.  */
12312   gcc_assert (idx == GET_MODE_SIZE (mode));
12313
12314   do
12315     {
12316       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12317                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12318
12319       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12320                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12321
12322       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12323                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12324
12325       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12326                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12327
12328       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12329
12330       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12331
12332       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12333                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12334
12335       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12336                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12337
12338       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12339                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12340
12341       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12342                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12343
12344       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12345
12346       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12347
12348       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12349                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12350
12351       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12352                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12353
12354       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12355                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12356
12357       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12358                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12359
12360       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12361
12362       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12363                         && bytes[i] == bytes[(i + 8) % idx]);
12364     }
12365   while (0);
12366
12367   if (immtype == -1)
12368     return -1;
12369
12370   if (elementwidth)
12371     *elementwidth = elsize;
12372
12373   if (modconst)
12374     {
12375       unsigned HOST_WIDE_INT imm = 0;
12376
12377       /* Un-invert bytes of recognized vector, if necessary.  */
12378       if (invmask != 0)
12379         for (i = 0; i < idx; i++)
12380           bytes[i] ^= invmask;
12381
12382       if (immtype == 17)
12383         {
12384           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12385           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12386
12387           for (i = 0; i < 8; i++)
12388             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12389                    << (i * BITS_PER_UNIT);
12390
12391           *modconst = GEN_INT (imm);
12392         }
12393       else
12394         {
12395           unsigned HOST_WIDE_INT imm = 0;
12396
12397           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12398             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12399
12400           *modconst = GEN_INT (imm);
12401         }
12402     }
12403
12404   return immtype;
12405 #undef CHECK
12406 }
12407
12408 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12409    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12410    float elements), and a modified constant (whatever should be output for a
12411    VMOV) in *MODCONST.  */
12412
12413 int
12414 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12415                                rtx *modconst, int *elementwidth)
12416 {
12417   rtx tmpconst;
12418   int tmpwidth;
12419   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12420
12421   if (retval == -1)
12422     return 0;
12423
12424   if (modconst)
12425     *modconst = tmpconst;
12426
12427   if (elementwidth)
12428     *elementwidth = tmpwidth;
12429
12430   return 1;
12431 }
12432
12433 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12434    the immediate is valid, write a constant suitable for using as an operand
12435    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12436    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12437
12438 int
12439 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12440                                 rtx *modconst, int *elementwidth)
12441 {
12442   rtx tmpconst;
12443   int tmpwidth;
12444   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12445
12446   if (retval < 0 || retval > 5)
12447     return 0;
12448
12449   if (modconst)
12450     *modconst = tmpconst;
12451
12452   if (elementwidth)
12453     *elementwidth = tmpwidth;
12454
12455   return 1;
12456 }
12457
12458 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12459    the immediate is valid, write a constant suitable for using as an operand
12460    to VSHR/VSHL to *MODCONST and the corresponding element width to
12461    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12462    because they have different limitations.  */
12463
12464 int
12465 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12466                                 rtx *modconst, int *elementwidth,
12467                                 bool isleftshift)
12468 {
12469   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12470   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12471   unsigned HOST_WIDE_INT last_elt = 0;
12472   unsigned HOST_WIDE_INT maxshift;
12473
12474   /* Split vector constant out into a byte vector.  */
12475   for (i = 0; i < n_elts; i++)
12476     {
12477       rtx el = CONST_VECTOR_ELT (op, i);
12478       unsigned HOST_WIDE_INT elpart;
12479
12480       if (CONST_INT_P (el))
12481         elpart = INTVAL (el);
12482       else if (CONST_DOUBLE_P (el))
12483         return 0;
12484       else
12485         gcc_unreachable ();
12486
12487       if (i != 0 && elpart != last_elt)
12488         return 0;
12489
12490       last_elt = elpart;
12491     }
12492
12493   /* Shift less than element size.  */
12494   maxshift = innersize * 8;
12495
12496   if (isleftshift)
12497     {
12498       /* Left shift immediate value can be from 0 to <size>-1.  */
12499       if (last_elt >= maxshift)
12500         return 0;
12501     }
12502   else
12503     {
12504       /* Right shift immediate value can be from 1 to <size>.  */
12505       if (last_elt == 0 || last_elt > maxshift)
12506         return 0;
12507     }
12508
12509   if (elementwidth)
12510     *elementwidth = innersize * 8;
12511
12512   if (modconst)
12513     *modconst = CONST_VECTOR_ELT (op, 0);
12514
12515   return 1;
12516 }
12517
12518 /* Return a string suitable for output of Neon immediate logic operation
12519    MNEM.  */
12520
12521 char *
12522 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12523                              int inverse, int quad)
12524 {
12525   int width, is_valid;
12526   static char templ[40];
12527
12528   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12529
12530   gcc_assert (is_valid != 0);
12531
12532   if (quad)
12533     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12534   else
12535     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12536
12537   return templ;
12538 }
12539
12540 /* Return a string suitable for output of Neon immediate shift operation
12541    (VSHR or VSHL) MNEM.  */
12542
12543 char *
12544 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12545                              machine_mode mode, int quad,
12546                              bool isleftshift)
12547 {
12548   int width, is_valid;
12549   static char templ[40];
12550
12551   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12552   gcc_assert (is_valid != 0);
12553
12554   if (quad)
12555     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12556   else
12557     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12558
12559   return templ;
12560 }
12561
12562 /* Output a sequence of pairwise operations to implement a reduction.
12563    NOTE: We do "too much work" here, because pairwise operations work on two
12564    registers-worth of operands in one go. Unfortunately we can't exploit those
12565    extra calculations to do the full operation in fewer steps, I don't think.
12566    Although all vector elements of the result but the first are ignored, we
12567    actually calculate the same result in each of the elements. An alternative
12568    such as initially loading a vector with zero to use as each of the second
12569    operands would use up an additional register and take an extra instruction,
12570    for no particular gain.  */
12571
12572 void
12573 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12574                       rtx (*reduc) (rtx, rtx, rtx))
12575 {
12576   machine_mode inner = GET_MODE_INNER (mode);
12577   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12578   rtx tmpsum = op1;
12579
12580   for (i = parts / 2; i >= 1; i /= 2)
12581     {
12582       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12583       emit_insn (reduc (dest, tmpsum, tmpsum));
12584       tmpsum = dest;
12585     }
12586 }
12587
12588 /* If VALS is a vector constant that can be loaded into a register
12589    using VDUP, generate instructions to do so and return an RTX to
12590    assign to the register.  Otherwise return NULL_RTX.  */
12591
12592 static rtx
12593 neon_vdup_constant (rtx vals)
12594 {
12595   machine_mode mode = GET_MODE (vals);
12596   machine_mode inner_mode = GET_MODE_INNER (mode);
12597   int n_elts = GET_MODE_NUNITS (mode);
12598   bool all_same = true;
12599   rtx x;
12600   int i;
12601
12602   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12603     return NULL_RTX;
12604
12605   for (i = 0; i < n_elts; ++i)
12606     {
12607       x = XVECEXP (vals, 0, i);
12608       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12609         all_same = false;
12610     }
12611
12612   if (!all_same)
12613     /* The elements are not all the same.  We could handle repeating
12614        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12615        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12616        vdup.i16).  */
12617     return NULL_RTX;
12618
12619   /* We can load this constant by using VDUP and a constant in a
12620      single ARM register.  This will be cheaper than a vector
12621      load.  */
12622
12623   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12624   return gen_rtx_VEC_DUPLICATE (mode, x);
12625 }
12626
12627 /* Generate code to load VALS, which is a PARALLEL containing only
12628    constants (for vec_init) or CONST_VECTOR, efficiently into a
12629    register.  Returns an RTX to copy into the register, or NULL_RTX
12630    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12631
12632 rtx
12633 neon_make_constant (rtx vals)
12634 {
12635   machine_mode mode = GET_MODE (vals);
12636   rtx target;
12637   rtx const_vec = NULL_RTX;
12638   int n_elts = GET_MODE_NUNITS (mode);
12639   int n_const = 0;
12640   int i;
12641
12642   if (GET_CODE (vals) == CONST_VECTOR)
12643     const_vec = vals;
12644   else if (GET_CODE (vals) == PARALLEL)
12645     {
12646       /* A CONST_VECTOR must contain only CONST_INTs and
12647          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12648          Only store valid constants in a CONST_VECTOR.  */
12649       for (i = 0; i < n_elts; ++i)
12650         {
12651           rtx x = XVECEXP (vals, 0, i);
12652           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12653             n_const++;
12654         }
12655       if (n_const == n_elts)
12656         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12657     }
12658   else
12659     gcc_unreachable ();
12660
12661   if (const_vec != NULL
12662       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12663     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12664     return const_vec;
12665   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12666     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12667        pipeline cycle; creating the constant takes one or two ARM
12668        pipeline cycles.  */
12669     return target;
12670   else if (const_vec != NULL_RTX)
12671     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12672        (for either double or quad vectors).  We can not take advantage
12673        of single-cycle VLD1 because we need a PC-relative addressing
12674        mode.  */
12675     return const_vec;
12676   else
12677     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12678        We can not construct an initializer.  */
12679     return NULL_RTX;
12680 }
12681
12682 /* Initialize vector TARGET to VALS.  */
12683
12684 void
12685 neon_expand_vector_init (rtx target, rtx vals)
12686 {
12687   machine_mode mode = GET_MODE (target);
12688   machine_mode inner_mode = GET_MODE_INNER (mode);
12689   int n_elts = GET_MODE_NUNITS (mode);
12690   int n_var = 0, one_var = -1;
12691   bool all_same = true;
12692   rtx x, mem;
12693   int i;
12694
12695   for (i = 0; i < n_elts; ++i)
12696     {
12697       x = XVECEXP (vals, 0, i);
12698       if (!CONSTANT_P (x))
12699         ++n_var, one_var = i;
12700
12701       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12702         all_same = false;
12703     }
12704
12705   if (n_var == 0)
12706     {
12707       rtx constant = neon_make_constant (vals);
12708       if (constant != NULL_RTX)
12709         {
12710           emit_move_insn (target, constant);
12711           return;
12712         }
12713     }
12714
12715   /* Splat a single non-constant element if we can.  */
12716   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12717     {
12718       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12719       emit_insn (gen_rtx_SET (VOIDmode, target,
12720                               gen_rtx_VEC_DUPLICATE (mode, x)));
12721       return;
12722     }
12723
12724   /* One field is non-constant.  Load constant then overwrite varying
12725      field.  This is more efficient than using the stack.  */
12726   if (n_var == 1)
12727     {
12728       rtx copy = copy_rtx (vals);
12729       rtx index = GEN_INT (one_var);
12730
12731       /* Load constant part of vector, substitute neighboring value for
12732          varying element.  */
12733       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12734       neon_expand_vector_init (target, copy);
12735
12736       /* Insert variable.  */
12737       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12738       switch (mode)
12739         {
12740         case V8QImode:
12741           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12742           break;
12743         case V16QImode:
12744           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12745           break;
12746         case V4HImode:
12747           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12748           break;
12749         case V8HImode:
12750           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12751           break;
12752         case V2SImode:
12753           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12754           break;
12755         case V4SImode:
12756           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12757           break;
12758         case V2SFmode:
12759           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12760           break;
12761         case V4SFmode:
12762           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12763           break;
12764         case V2DImode:
12765           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12766           break;
12767         default:
12768           gcc_unreachable ();
12769         }
12770       return;
12771     }
12772
12773   /* Construct the vector in memory one field at a time
12774      and load the whole vector.  */
12775   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12776   for (i = 0; i < n_elts; i++)
12777     emit_move_insn (adjust_address_nv (mem, inner_mode,
12778                                     i * GET_MODE_SIZE (inner_mode)),
12779                     XVECEXP (vals, 0, i));
12780   emit_move_insn (target, mem);
12781 }
12782
12783 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12784    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12785    reported source locations are bogus.  */
12786
12787 static void
12788 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12789               const char *err)
12790 {
12791   HOST_WIDE_INT lane;
12792
12793   gcc_assert (CONST_INT_P (operand));
12794
12795   lane = INTVAL (operand);
12796
12797   if (lane < low || lane >= high)
12798     error (err);
12799 }
12800
12801 /* Bounds-check lanes.  */
12802
12803 void
12804 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12805 {
12806   bounds_check (operand, low, high, "lane out of range");
12807 }
12808
12809 /* Bounds-check constants.  */
12810
12811 void
12812 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12813 {
12814   bounds_check (operand, low, high, "constant out of range");
12815 }
12816
12817 HOST_WIDE_INT
12818 neon_element_bits (machine_mode mode)
12819 {
12820   if (mode == DImode)
12821     return GET_MODE_BITSIZE (mode);
12822   else
12823     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12824 }
12825
12826 \f
12827 /* Predicates for `match_operand' and `match_operator'.  */
12828
12829 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12830    WB is true if full writeback address modes are allowed and is false
12831    if limited writeback address modes (POST_INC and PRE_DEC) are
12832    allowed.  */
12833
12834 int
12835 arm_coproc_mem_operand (rtx op, bool wb)
12836 {
12837   rtx ind;
12838
12839   /* Reject eliminable registers.  */
12840   if (! (reload_in_progress || reload_completed || lra_in_progress)
12841       && (   reg_mentioned_p (frame_pointer_rtx, op)
12842           || reg_mentioned_p (arg_pointer_rtx, op)
12843           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12844           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12845           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12846           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12847     return FALSE;
12848
12849   /* Constants are converted into offsets from labels.  */
12850   if (!MEM_P (op))
12851     return FALSE;
12852
12853   ind = XEXP (op, 0);
12854
12855   if (reload_completed
12856       && (GET_CODE (ind) == LABEL_REF
12857           || (GET_CODE (ind) == CONST
12858               && GET_CODE (XEXP (ind, 0)) == PLUS
12859               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12860               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12861     return TRUE;
12862
12863   /* Match: (mem (reg)).  */
12864   if (REG_P (ind))
12865     return arm_address_register_rtx_p (ind, 0);
12866
12867   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12868      acceptable in any case (subject to verification by
12869      arm_address_register_rtx_p).  We need WB to be true to accept
12870      PRE_INC and POST_DEC.  */
12871   if (GET_CODE (ind) == POST_INC
12872       || GET_CODE (ind) == PRE_DEC
12873       || (wb
12874           && (GET_CODE (ind) == PRE_INC
12875               || GET_CODE (ind) == POST_DEC)))
12876     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12877
12878   if (wb
12879       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12880       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12881       && GET_CODE (XEXP (ind, 1)) == PLUS
12882       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12883     ind = XEXP (ind, 1);
12884
12885   /* Match:
12886      (plus (reg)
12887            (const)).  */
12888   if (GET_CODE (ind) == PLUS
12889       && REG_P (XEXP (ind, 0))
12890       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12891       && CONST_INT_P (XEXP (ind, 1))
12892       && INTVAL (XEXP (ind, 1)) > -1024
12893       && INTVAL (XEXP (ind, 1)) <  1024
12894       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12895     return TRUE;
12896
12897   return FALSE;
12898 }
12899
12900 /* Return TRUE if OP is a memory operand which we can load or store a vector
12901    to/from. TYPE is one of the following values:
12902     0 - Vector load/stor (vldr)
12903     1 - Core registers (ldm)
12904     2 - Element/structure loads (vld1)
12905  */
12906 int
12907 neon_vector_mem_operand (rtx op, int type, bool strict)
12908 {
12909   rtx ind;
12910
12911   /* Reject eliminable registers.  */
12912   if (! (reload_in_progress || reload_completed)
12913       && (   reg_mentioned_p (frame_pointer_rtx, op)
12914           || reg_mentioned_p (arg_pointer_rtx, op)
12915           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12916           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12917           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12918           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12919     return !strict;
12920
12921   /* Constants are converted into offsets from labels.  */
12922   if (!MEM_P (op))
12923     return FALSE;
12924
12925   ind = XEXP (op, 0);
12926
12927   if (reload_completed
12928       && (GET_CODE (ind) == LABEL_REF
12929           || (GET_CODE (ind) == CONST
12930               && GET_CODE (XEXP (ind, 0)) == PLUS
12931               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12932               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12933     return TRUE;
12934
12935   /* Match: (mem (reg)).  */
12936   if (REG_P (ind))
12937     return arm_address_register_rtx_p (ind, 0);
12938
12939   /* Allow post-increment with Neon registers.  */
12940   if ((type != 1 && GET_CODE (ind) == POST_INC)
12941       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12942     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12943
12944   /* Allow post-increment by register for VLDn */
12945   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12946       && GET_CODE (XEXP (ind, 1)) == PLUS
12947       && REG_P (XEXP (XEXP (ind, 1), 1)))
12948      return true;
12949
12950   /* Match:
12951      (plus (reg)
12952           (const)).  */
12953   if (type == 0
12954       && GET_CODE (ind) == PLUS
12955       && REG_P (XEXP (ind, 0))
12956       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12957       && CONST_INT_P (XEXP (ind, 1))
12958       && INTVAL (XEXP (ind, 1)) > -1024
12959       /* For quad modes, we restrict the constant offset to be slightly less
12960          than what the instruction format permits.  We have no such constraint
12961          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12962       && (INTVAL (XEXP (ind, 1))
12963           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12964       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12965     return TRUE;
12966
12967   return FALSE;
12968 }
12969
12970 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12971    type.  */
12972 int
12973 neon_struct_mem_operand (rtx op)
12974 {
12975   rtx ind;
12976
12977   /* Reject eliminable registers.  */
12978   if (! (reload_in_progress || reload_completed)
12979       && (   reg_mentioned_p (frame_pointer_rtx, op)
12980           || reg_mentioned_p (arg_pointer_rtx, op)
12981           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12982           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12983           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12984           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12985     return FALSE;
12986
12987   /* Constants are converted into offsets from labels.  */
12988   if (!MEM_P (op))
12989     return FALSE;
12990
12991   ind = XEXP (op, 0);
12992
12993   if (reload_completed
12994       && (GET_CODE (ind) == LABEL_REF
12995           || (GET_CODE (ind) == CONST
12996               && GET_CODE (XEXP (ind, 0)) == PLUS
12997               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12998               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12999     return TRUE;
13000
13001   /* Match: (mem (reg)).  */
13002   if (REG_P (ind))
13003     return arm_address_register_rtx_p (ind, 0);
13004
13005   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13006   if (GET_CODE (ind) == POST_INC
13007       || GET_CODE (ind) == PRE_DEC)
13008     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13009
13010   return FALSE;
13011 }
13012
13013 /* Return true if X is a register that will be eliminated later on.  */
13014 int
13015 arm_eliminable_register (rtx x)
13016 {
13017   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13018                        || REGNO (x) == ARG_POINTER_REGNUM
13019                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13020                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13021 }
13022
13023 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13024    coprocessor registers.  Otherwise return NO_REGS.  */
13025
13026 enum reg_class
13027 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13028 {
13029   if (mode == HFmode)
13030     {
13031       if (!TARGET_NEON_FP16)
13032         return GENERAL_REGS;
13033       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13034         return NO_REGS;
13035       return GENERAL_REGS;
13036     }
13037
13038   /* The neon move patterns handle all legitimate vector and struct
13039      addresses.  */
13040   if (TARGET_NEON
13041       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13042       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13043           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13044           || VALID_NEON_STRUCT_MODE (mode)))
13045     return NO_REGS;
13046
13047   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13048     return NO_REGS;
13049
13050   return GENERAL_REGS;
13051 }
13052
13053 /* Values which must be returned in the most-significant end of the return
13054    register.  */
13055
13056 static bool
13057 arm_return_in_msb (const_tree valtype)
13058 {
13059   return (TARGET_AAPCS_BASED
13060           && BYTES_BIG_ENDIAN
13061           && (AGGREGATE_TYPE_P (valtype)
13062               || TREE_CODE (valtype) == COMPLEX_TYPE
13063               || FIXED_POINT_TYPE_P (valtype)));
13064 }
13065
13066 /* Return TRUE if X references a SYMBOL_REF.  */
13067 int
13068 symbol_mentioned_p (rtx x)
13069 {
13070   const char * fmt;
13071   int i;
13072
13073   if (GET_CODE (x) == SYMBOL_REF)
13074     return 1;
13075
13076   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13077      are constant offsets, not symbols.  */
13078   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13079     return 0;
13080
13081   fmt = GET_RTX_FORMAT (GET_CODE (x));
13082
13083   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13084     {
13085       if (fmt[i] == 'E')
13086         {
13087           int j;
13088
13089           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13090             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13091               return 1;
13092         }
13093       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13094         return 1;
13095     }
13096
13097   return 0;
13098 }
13099
13100 /* Return TRUE if X references a LABEL_REF.  */
13101 int
13102 label_mentioned_p (rtx x)
13103 {
13104   const char * fmt;
13105   int i;
13106
13107   if (GET_CODE (x) == LABEL_REF)
13108     return 1;
13109
13110   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13111      instruction, but they are constant offsets, not symbols.  */
13112   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13113     return 0;
13114
13115   fmt = GET_RTX_FORMAT (GET_CODE (x));
13116   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13117     {
13118       if (fmt[i] == 'E')
13119         {
13120           int j;
13121
13122           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13123             if (label_mentioned_p (XVECEXP (x, i, j)))
13124               return 1;
13125         }
13126       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13127         return 1;
13128     }
13129
13130   return 0;
13131 }
13132
13133 int
13134 tls_mentioned_p (rtx x)
13135 {
13136   switch (GET_CODE (x))
13137     {
13138     case CONST:
13139       return tls_mentioned_p (XEXP (x, 0));
13140
13141     case UNSPEC:
13142       if (XINT (x, 1) == UNSPEC_TLS)
13143         return 1;
13144
13145     default:
13146       return 0;
13147     }
13148 }
13149
13150 /* Must not copy any rtx that uses a pc-relative address.  */
13151
13152 static bool
13153 arm_cannot_copy_insn_p (rtx_insn *insn)
13154 {
13155   /* The tls call insn cannot be copied, as it is paired with a data
13156      word.  */
13157   if (recog_memoized (insn) == CODE_FOR_tlscall)
13158     return true;
13159
13160   subrtx_iterator::array_type array;
13161   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13162     {
13163       const_rtx x = *iter;
13164       if (GET_CODE (x) == UNSPEC
13165           && (XINT (x, 1) == UNSPEC_PIC_BASE
13166               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13167         return true;
13168     }
13169   return false;
13170 }
13171
13172 enum rtx_code
13173 minmax_code (rtx x)
13174 {
13175   enum rtx_code code = GET_CODE (x);
13176
13177   switch (code)
13178     {
13179     case SMAX:
13180       return GE;
13181     case SMIN:
13182       return LE;
13183     case UMIN:
13184       return LEU;
13185     case UMAX:
13186       return GEU;
13187     default:
13188       gcc_unreachable ();
13189     }
13190 }
13191
13192 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13193
13194 bool
13195 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13196                         int *mask, bool *signed_sat)
13197 {
13198   /* The high bound must be a power of two minus one.  */
13199   int log = exact_log2 (INTVAL (hi_bound) + 1);
13200   if (log == -1)
13201     return false;
13202
13203   /* The low bound is either zero (for usat) or one less than the
13204      negation of the high bound (for ssat).  */
13205   if (INTVAL (lo_bound) == 0)
13206     {
13207       if (mask)
13208         *mask = log;
13209       if (signed_sat)
13210         *signed_sat = false;
13211
13212       return true;
13213     }
13214
13215   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13216     {
13217       if (mask)
13218         *mask = log + 1;
13219       if (signed_sat)
13220         *signed_sat = true;
13221
13222       return true;
13223     }
13224
13225   return false;
13226 }
13227
13228 /* Return 1 if memory locations are adjacent.  */
13229 int
13230 adjacent_mem_locations (rtx a, rtx b)
13231 {
13232   /* We don't guarantee to preserve the order of these memory refs.  */
13233   if (volatile_refs_p (a) || volatile_refs_p (b))
13234     return 0;
13235
13236   if ((REG_P (XEXP (a, 0))
13237        || (GET_CODE (XEXP (a, 0)) == PLUS
13238            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13239       && (REG_P (XEXP (b, 0))
13240           || (GET_CODE (XEXP (b, 0)) == PLUS
13241               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13242     {
13243       HOST_WIDE_INT val0 = 0, val1 = 0;
13244       rtx reg0, reg1;
13245       int val_diff;
13246
13247       if (GET_CODE (XEXP (a, 0)) == PLUS)
13248         {
13249           reg0 = XEXP (XEXP (a, 0), 0);
13250           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13251         }
13252       else
13253         reg0 = XEXP (a, 0);
13254
13255       if (GET_CODE (XEXP (b, 0)) == PLUS)
13256         {
13257           reg1 = XEXP (XEXP (b, 0), 0);
13258           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13259         }
13260       else
13261         reg1 = XEXP (b, 0);
13262
13263       /* Don't accept any offset that will require multiple
13264          instructions to handle, since this would cause the
13265          arith_adjacentmem pattern to output an overlong sequence.  */
13266       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13267         return 0;
13268
13269       /* Don't allow an eliminable register: register elimination can make
13270          the offset too large.  */
13271       if (arm_eliminable_register (reg0))
13272         return 0;
13273
13274       val_diff = val1 - val0;
13275
13276       if (arm_ld_sched)
13277         {
13278           /* If the target has load delay slots, then there's no benefit
13279              to using an ldm instruction unless the offset is zero and
13280              we are optimizing for size.  */
13281           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13282                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13283                   && (val_diff == 4 || val_diff == -4));
13284         }
13285
13286       return ((REGNO (reg0) == REGNO (reg1))
13287               && (val_diff == 4 || val_diff == -4));
13288     }
13289
13290   return 0;
13291 }
13292
13293 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13294    for load operations, false for store operations.  CONSECUTIVE is true
13295    if the register numbers in the operation must be consecutive in the register
13296    bank. RETURN_PC is true if value is to be loaded in PC.
13297    The pattern we are trying to match for load is:
13298      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13299       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13300        :
13301        :
13302       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13303      ]
13304      where
13305      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13306      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13307      3.  If consecutive is TRUE, then for kth register being loaded,
13308          REGNO (R_dk) = REGNO (R_d0) + k.
13309    The pattern for store is similar.  */
13310 bool
13311 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13312                      bool consecutive, bool return_pc)
13313 {
13314   HOST_WIDE_INT count = XVECLEN (op, 0);
13315   rtx reg, mem, addr;
13316   unsigned regno;
13317   unsigned first_regno;
13318   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13319   rtx elt;
13320   bool addr_reg_in_reglist = false;
13321   bool update = false;
13322   int reg_increment;
13323   int offset_adj;
13324   int regs_per_val;
13325
13326   /* If not in SImode, then registers must be consecutive
13327      (e.g., VLDM instructions for DFmode).  */
13328   gcc_assert ((mode == SImode) || consecutive);
13329   /* Setting return_pc for stores is illegal.  */
13330   gcc_assert (!return_pc || load);
13331
13332   /* Set up the increments and the regs per val based on the mode.  */
13333   reg_increment = GET_MODE_SIZE (mode);
13334   regs_per_val = reg_increment / 4;
13335   offset_adj = return_pc ? 1 : 0;
13336
13337   if (count <= 1
13338       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13339       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13340     return false;
13341
13342   /* Check if this is a write-back.  */
13343   elt = XVECEXP (op, 0, offset_adj);
13344   if (GET_CODE (SET_SRC (elt)) == PLUS)
13345     {
13346       i++;
13347       base = 1;
13348       update = true;
13349
13350       /* The offset adjustment must be the number of registers being
13351          popped times the size of a single register.  */
13352       if (!REG_P (SET_DEST (elt))
13353           || !REG_P (XEXP (SET_SRC (elt), 0))
13354           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13355           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13356           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13357              ((count - 1 - offset_adj) * reg_increment))
13358         return false;
13359     }
13360
13361   i = i + offset_adj;
13362   base = base + offset_adj;
13363   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13364      success depends on the type: VLDM can do just one reg,
13365      LDM must do at least two.  */
13366   if ((count <= i) && (mode == SImode))
13367       return false;
13368
13369   elt = XVECEXP (op, 0, i - 1);
13370   if (GET_CODE (elt) != SET)
13371     return false;
13372
13373   if (load)
13374     {
13375       reg = SET_DEST (elt);
13376       mem = SET_SRC (elt);
13377     }
13378   else
13379     {
13380       reg = SET_SRC (elt);
13381       mem = SET_DEST (elt);
13382     }
13383
13384   if (!REG_P (reg) || !MEM_P (mem))
13385     return false;
13386
13387   regno = REGNO (reg);
13388   first_regno = regno;
13389   addr = XEXP (mem, 0);
13390   if (GET_CODE (addr) == PLUS)
13391     {
13392       if (!CONST_INT_P (XEXP (addr, 1)))
13393         return false;
13394
13395       offset = INTVAL (XEXP (addr, 1));
13396       addr = XEXP (addr, 0);
13397     }
13398
13399   if (!REG_P (addr))
13400     return false;
13401
13402   /* Don't allow SP to be loaded unless it is also the base register. It
13403      guarantees that SP is reset correctly when an LDM instruction
13404      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13405   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13406     return false;
13407
13408   for (; i < count; i++)
13409     {
13410       elt = XVECEXP (op, 0, i);
13411       if (GET_CODE (elt) != SET)
13412         return false;
13413
13414       if (load)
13415         {
13416           reg = SET_DEST (elt);
13417           mem = SET_SRC (elt);
13418         }
13419       else
13420         {
13421           reg = SET_SRC (elt);
13422           mem = SET_DEST (elt);
13423         }
13424
13425       if (!REG_P (reg)
13426           || GET_MODE (reg) != mode
13427           || REGNO (reg) <= regno
13428           || (consecutive
13429               && (REGNO (reg) !=
13430                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13431           /* Don't allow SP to be loaded unless it is also the base register. It
13432              guarantees that SP is reset correctly when an LDM instruction
13433              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13434           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13435           || !MEM_P (mem)
13436           || GET_MODE (mem) != mode
13437           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13438                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13439                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13440                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13441                    offset + (i - base) * reg_increment))
13442               && (!REG_P (XEXP (mem, 0))
13443                   || offset + (i - base) * reg_increment != 0)))
13444         return false;
13445
13446       regno = REGNO (reg);
13447       if (regno == REGNO (addr))
13448         addr_reg_in_reglist = true;
13449     }
13450
13451   if (load)
13452     {
13453       if (update && addr_reg_in_reglist)
13454         return false;
13455
13456       /* For Thumb-1, address register is always modified - either by write-back
13457          or by explicit load.  If the pattern does not describe an update,
13458          then the address register must be in the list of loaded registers.  */
13459       if (TARGET_THUMB1)
13460         return update || addr_reg_in_reglist;
13461     }
13462
13463   return true;
13464 }
13465
13466 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13467    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13468    instruction.  ADD_OFFSET is nonzero if the base address register needs
13469    to be modified with an add instruction before we can use it.  */
13470
13471 static bool
13472 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13473                                  int nops, HOST_WIDE_INT add_offset)
13474  {
13475   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13476      if the offset isn't small enough.  The reason 2 ldrs are faster
13477      is because these ARMs are able to do more than one cache access
13478      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13479      whilst the ARM8 has a double bandwidth cache.  This means that
13480      these cores can do both an instruction fetch and a data fetch in
13481      a single cycle, so the trick of calculating the address into a
13482      scratch register (one of the result regs) and then doing a load
13483      multiple actually becomes slower (and no smaller in code size).
13484      That is the transformation
13485
13486         ldr     rd1, [rbase + offset]
13487         ldr     rd2, [rbase + offset + 4]
13488
13489      to
13490
13491         add     rd1, rbase, offset
13492         ldmia   rd1, {rd1, rd2}
13493
13494      produces worse code -- '3 cycles + any stalls on rd2' instead of
13495      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13496      access per cycle, the first sequence could never complete in less
13497      than 6 cycles, whereas the ldm sequence would only take 5 and
13498      would make better use of sequential accesses if not hitting the
13499      cache.
13500
13501      We cheat here and test 'arm_ld_sched' which we currently know to
13502      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13503      changes, then the test below needs to be reworked.  */
13504   if (nops == 2 && arm_ld_sched && add_offset != 0)
13505     return false;
13506
13507   /* XScale has load-store double instructions, but they have stricter
13508      alignment requirements than load-store multiple, so we cannot
13509      use them.
13510
13511      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13512      the pipeline until completion.
13513
13514         NREGS           CYCLES
13515           1               3
13516           2               4
13517           3               5
13518           4               6
13519
13520      An ldr instruction takes 1-3 cycles, but does not block the
13521      pipeline.
13522
13523         NREGS           CYCLES
13524           1              1-3
13525           2              2-6
13526           3              3-9
13527           4              4-12
13528
13529      Best case ldr will always win.  However, the more ldr instructions
13530      we issue, the less likely we are to be able to schedule them well.
13531      Using ldr instructions also increases code size.
13532
13533      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13534      for counts of 3 or 4 regs.  */
13535   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13536     return false;
13537   return true;
13538 }
13539
13540 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13541    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13542    an array ORDER which describes the sequence to use when accessing the
13543    offsets that produces an ascending order.  In this sequence, each
13544    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13545    must have been filled in with the lowest offset by the caller.
13546    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13547    we use to verify that ORDER produces an ascending order of registers.
13548    Return true if it was possible to construct such an order, false if
13549    not.  */
13550
13551 static bool
13552 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13553                       int *unsorted_regs)
13554 {
13555   int i;
13556   for (i = 1; i < nops; i++)
13557     {
13558       int j;
13559
13560       order[i] = order[i - 1];
13561       for (j = 0; j < nops; j++)
13562         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13563           {
13564             /* We must find exactly one offset that is higher than the
13565                previous one by 4.  */
13566             if (order[i] != order[i - 1])
13567               return false;
13568             order[i] = j;
13569           }
13570       if (order[i] == order[i - 1])
13571         return false;
13572       /* The register numbers must be ascending.  */
13573       if (unsorted_regs != NULL
13574           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13575         return false;
13576     }
13577   return true;
13578 }
13579
13580 /* Used to determine in a peephole whether a sequence of load
13581    instructions can be changed into a load-multiple instruction.
13582    NOPS is the number of separate load instructions we are examining.  The
13583    first NOPS entries in OPERANDS are the destination registers, the
13584    next NOPS entries are memory operands.  If this function is
13585    successful, *BASE is set to the common base register of the memory
13586    accesses; *LOAD_OFFSET is set to the first memory location's offset
13587    from that base register.
13588    REGS is an array filled in with the destination register numbers.
13589    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13590    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13591    the sequence of registers in REGS matches the loads from ascending memory
13592    locations, and the function verifies that the register numbers are
13593    themselves ascending.  If CHECK_REGS is false, the register numbers
13594    are stored in the order they are found in the operands.  */
13595 static int
13596 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13597                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13598 {
13599   int unsorted_regs[MAX_LDM_STM_OPS];
13600   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13601   int order[MAX_LDM_STM_OPS];
13602   rtx base_reg_rtx = NULL;
13603   int base_reg = -1;
13604   int i, ldm_case;
13605
13606   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13607      easily extended if required.  */
13608   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13609
13610   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13611
13612   /* Loop over the operands and check that the memory references are
13613      suitable (i.e. immediate offsets from the same base register).  At
13614      the same time, extract the target register, and the memory
13615      offsets.  */
13616   for (i = 0; i < nops; i++)
13617     {
13618       rtx reg;
13619       rtx offset;
13620
13621       /* Convert a subreg of a mem into the mem itself.  */
13622       if (GET_CODE (operands[nops + i]) == SUBREG)
13623         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13624
13625       gcc_assert (MEM_P (operands[nops + i]));
13626
13627       /* Don't reorder volatile memory references; it doesn't seem worth
13628          looking for the case where the order is ok anyway.  */
13629       if (MEM_VOLATILE_P (operands[nops + i]))
13630         return 0;
13631
13632       offset = const0_rtx;
13633
13634       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13635            || (GET_CODE (reg) == SUBREG
13636                && REG_P (reg = SUBREG_REG (reg))))
13637           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13638               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13639                   || (GET_CODE (reg) == SUBREG
13640                       && REG_P (reg = SUBREG_REG (reg))))
13641               && (CONST_INT_P (offset
13642                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13643         {
13644           if (i == 0)
13645             {
13646               base_reg = REGNO (reg);
13647               base_reg_rtx = reg;
13648               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13649                 return 0;
13650             }
13651           else if (base_reg != (int) REGNO (reg))
13652             /* Not addressed from the same base register.  */
13653             return 0;
13654
13655           unsorted_regs[i] = (REG_P (operands[i])
13656                               ? REGNO (operands[i])
13657                               : REGNO (SUBREG_REG (operands[i])));
13658
13659           /* If it isn't an integer register, or if it overwrites the
13660              base register but isn't the last insn in the list, then
13661              we can't do this.  */
13662           if (unsorted_regs[i] < 0
13663               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13664               || unsorted_regs[i] > 14
13665               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13666             return 0;
13667
13668           /* Don't allow SP to be loaded unless it is also the base
13669              register.  It guarantees that SP is reset correctly when
13670              an LDM instruction is interrupted.  Otherwise, we might
13671              end up with a corrupt stack.  */
13672           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13673             return 0;
13674
13675           unsorted_offsets[i] = INTVAL (offset);
13676           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13677             order[0] = i;
13678         }
13679       else
13680         /* Not a suitable memory address.  */
13681         return 0;
13682     }
13683
13684   /* All the useful information has now been extracted from the
13685      operands into unsorted_regs and unsorted_offsets; additionally,
13686      order[0] has been set to the lowest offset in the list.  Sort
13687      the offsets into order, verifying that they are adjacent, and
13688      check that the register numbers are ascending.  */
13689   if (!compute_offset_order (nops, unsorted_offsets, order,
13690                              check_regs ? unsorted_regs : NULL))
13691     return 0;
13692
13693   if (saved_order)
13694     memcpy (saved_order, order, sizeof order);
13695
13696   if (base)
13697     {
13698       *base = base_reg;
13699
13700       for (i = 0; i < nops; i++)
13701         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13702
13703       *load_offset = unsorted_offsets[order[0]];
13704     }
13705
13706   if (TARGET_THUMB1
13707       && !peep2_reg_dead_p (nops, base_reg_rtx))
13708     return 0;
13709
13710   if (unsorted_offsets[order[0]] == 0)
13711     ldm_case = 1; /* ldmia */
13712   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13713     ldm_case = 2; /* ldmib */
13714   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13715     ldm_case = 3; /* ldmda */
13716   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13717     ldm_case = 4; /* ldmdb */
13718   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13719            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13720     ldm_case = 5;
13721   else
13722     return 0;
13723
13724   if (!multiple_operation_profitable_p (false, nops,
13725                                         ldm_case == 5
13726                                         ? unsorted_offsets[order[0]] : 0))
13727     return 0;
13728
13729   return ldm_case;
13730 }
13731
13732 /* Used to determine in a peephole whether a sequence of store instructions can
13733    be changed into a store-multiple instruction.
13734    NOPS is the number of separate store instructions we are examining.
13735    NOPS_TOTAL is the total number of instructions recognized by the peephole
13736    pattern.
13737    The first NOPS entries in OPERANDS are the source registers, the next
13738    NOPS entries are memory operands.  If this function is successful, *BASE is
13739    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13740    to the first memory location's offset from that base register.  REGS is an
13741    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13742    likewise filled with the corresponding rtx's.
13743    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13744    numbers to an ascending order of stores.
13745    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13746    from ascending memory locations, and the function verifies that the register
13747    numbers are themselves ascending.  If CHECK_REGS is false, the register
13748    numbers are stored in the order they are found in the operands.  */
13749 static int
13750 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13751                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13752                          HOST_WIDE_INT *load_offset, bool check_regs)
13753 {
13754   int unsorted_regs[MAX_LDM_STM_OPS];
13755   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13756   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13757   int order[MAX_LDM_STM_OPS];
13758   int base_reg = -1;
13759   rtx base_reg_rtx = NULL;
13760   int i, stm_case;
13761
13762   /* Write back of base register is currently only supported for Thumb 1.  */
13763   int base_writeback = TARGET_THUMB1;
13764
13765   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13766      easily extended if required.  */
13767   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13768
13769   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13770
13771   /* Loop over the operands and check that the memory references are
13772      suitable (i.e. immediate offsets from the same base register).  At
13773      the same time, extract the target register, and the memory
13774      offsets.  */
13775   for (i = 0; i < nops; i++)
13776     {
13777       rtx reg;
13778       rtx offset;
13779
13780       /* Convert a subreg of a mem into the mem itself.  */
13781       if (GET_CODE (operands[nops + i]) == SUBREG)
13782         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13783
13784       gcc_assert (MEM_P (operands[nops + i]));
13785
13786       /* Don't reorder volatile memory references; it doesn't seem worth
13787          looking for the case where the order is ok anyway.  */
13788       if (MEM_VOLATILE_P (operands[nops + i]))
13789         return 0;
13790
13791       offset = const0_rtx;
13792
13793       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13794            || (GET_CODE (reg) == SUBREG
13795                && REG_P (reg = SUBREG_REG (reg))))
13796           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13797               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13798                   || (GET_CODE (reg) == SUBREG
13799                       && REG_P (reg = SUBREG_REG (reg))))
13800               && (CONST_INT_P (offset
13801                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13802         {
13803           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13804                                   ? operands[i] : SUBREG_REG (operands[i]));
13805           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13806
13807           if (i == 0)
13808             {
13809               base_reg = REGNO (reg);
13810               base_reg_rtx = reg;
13811               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13812                 return 0;
13813             }
13814           else if (base_reg != (int) REGNO (reg))
13815             /* Not addressed from the same base register.  */
13816             return 0;
13817
13818           /* If it isn't an integer register, then we can't do this.  */
13819           if (unsorted_regs[i] < 0
13820               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13821               /* The effects are unpredictable if the base register is
13822                  both updated and stored.  */
13823               || (base_writeback && unsorted_regs[i] == base_reg)
13824               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13825               || unsorted_regs[i] > 14)
13826             return 0;
13827
13828           unsorted_offsets[i] = INTVAL (offset);
13829           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13830             order[0] = i;
13831         }
13832       else
13833         /* Not a suitable memory address.  */
13834         return 0;
13835     }
13836
13837   /* All the useful information has now been extracted from the
13838      operands into unsorted_regs and unsorted_offsets; additionally,
13839      order[0] has been set to the lowest offset in the list.  Sort
13840      the offsets into order, verifying that they are adjacent, and
13841      check that the register numbers are ascending.  */
13842   if (!compute_offset_order (nops, unsorted_offsets, order,
13843                              check_regs ? unsorted_regs : NULL))
13844     return 0;
13845
13846   if (saved_order)
13847     memcpy (saved_order, order, sizeof order);
13848
13849   if (base)
13850     {
13851       *base = base_reg;
13852
13853       for (i = 0; i < nops; i++)
13854         {
13855           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13856           if (reg_rtxs)
13857             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13858         }
13859
13860       *load_offset = unsorted_offsets[order[0]];
13861     }
13862
13863   if (TARGET_THUMB1
13864       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13865     return 0;
13866
13867   if (unsorted_offsets[order[0]] == 0)
13868     stm_case = 1; /* stmia */
13869   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13870     stm_case = 2; /* stmib */
13871   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13872     stm_case = 3; /* stmda */
13873   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13874     stm_case = 4; /* stmdb */
13875   else
13876     return 0;
13877
13878   if (!multiple_operation_profitable_p (false, nops, 0))
13879     return 0;
13880
13881   return stm_case;
13882 }
13883 \f
13884 /* Routines for use in generating RTL.  */
13885
13886 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13887    the instruction; REGS and MEMS are arrays containing the operands.
13888    BASEREG is the base register to be used in addressing the memory operands.
13889    WBACK_OFFSET is nonzero if the instruction should update the base
13890    register.  */
13891
13892 static rtx
13893 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13894                          HOST_WIDE_INT wback_offset)
13895 {
13896   int i = 0, j;
13897   rtx result;
13898
13899   if (!multiple_operation_profitable_p (false, count, 0))
13900     {
13901       rtx seq;
13902
13903       start_sequence ();
13904
13905       for (i = 0; i < count; i++)
13906         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13907
13908       if (wback_offset != 0)
13909         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13910
13911       seq = get_insns ();
13912       end_sequence ();
13913
13914       return seq;
13915     }
13916
13917   result = gen_rtx_PARALLEL (VOIDmode,
13918                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13919   if (wback_offset != 0)
13920     {
13921       XVECEXP (result, 0, 0)
13922         = gen_rtx_SET (VOIDmode, basereg,
13923                        plus_constant (Pmode, basereg, wback_offset));
13924       i = 1;
13925       count++;
13926     }
13927
13928   for (j = 0; i < count; i++, j++)
13929     XVECEXP (result, 0, i)
13930       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13931
13932   return result;
13933 }
13934
13935 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13936    the instruction; REGS and MEMS are arrays containing the operands.
13937    BASEREG is the base register to be used in addressing the memory operands.
13938    WBACK_OFFSET is nonzero if the instruction should update the base
13939    register.  */
13940
13941 static rtx
13942 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13943                           HOST_WIDE_INT wback_offset)
13944 {
13945   int i = 0, j;
13946   rtx result;
13947
13948   if (GET_CODE (basereg) == PLUS)
13949     basereg = XEXP (basereg, 0);
13950
13951   if (!multiple_operation_profitable_p (false, count, 0))
13952     {
13953       rtx seq;
13954
13955       start_sequence ();
13956
13957       for (i = 0; i < count; i++)
13958         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13959
13960       if (wback_offset != 0)
13961         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13962
13963       seq = get_insns ();
13964       end_sequence ();
13965
13966       return seq;
13967     }
13968
13969   result = gen_rtx_PARALLEL (VOIDmode,
13970                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13971   if (wback_offset != 0)
13972     {
13973       XVECEXP (result, 0, 0)
13974         = gen_rtx_SET (VOIDmode, basereg,
13975                        plus_constant (Pmode, basereg, wback_offset));
13976       i = 1;
13977       count++;
13978     }
13979
13980   for (j = 0; i < count; i++, j++)
13981     XVECEXP (result, 0, i)
13982       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13983
13984   return result;
13985 }
13986
13987 /* Generate either a load-multiple or a store-multiple instruction.  This
13988    function can be used in situations where we can start with a single MEM
13989    rtx and adjust its address upwards.
13990    COUNT is the number of operations in the instruction, not counting a
13991    possible update of the base register.  REGS is an array containing the
13992    register operands.
13993    BASEREG is the base register to be used in addressing the memory operands,
13994    which are constructed from BASEMEM.
13995    WRITE_BACK specifies whether the generated instruction should include an
13996    update of the base register.
13997    OFFSETP is used to pass an offset to and from this function; this offset
13998    is not used when constructing the address (instead BASEMEM should have an
13999    appropriate offset in its address), it is used only for setting
14000    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14001
14002 static rtx
14003 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14004                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14005 {
14006   rtx mems[MAX_LDM_STM_OPS];
14007   HOST_WIDE_INT offset = *offsetp;
14008   int i;
14009
14010   gcc_assert (count <= MAX_LDM_STM_OPS);
14011
14012   if (GET_CODE (basereg) == PLUS)
14013     basereg = XEXP (basereg, 0);
14014
14015   for (i = 0; i < count; i++)
14016     {
14017       rtx addr = plus_constant (Pmode, basereg, i * 4);
14018       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14019       offset += 4;
14020     }
14021
14022   if (write_back)
14023     *offsetp = offset;
14024
14025   if (is_load)
14026     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14027                                     write_back ? 4 * count : 0);
14028   else
14029     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14030                                      write_back ? 4 * count : 0);
14031 }
14032
14033 rtx
14034 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14035                        rtx basemem, HOST_WIDE_INT *offsetp)
14036 {
14037   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14038                               offsetp);
14039 }
14040
14041 rtx
14042 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14043                         rtx basemem, HOST_WIDE_INT *offsetp)
14044 {
14045   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14046                               offsetp);
14047 }
14048
14049 /* Called from a peephole2 expander to turn a sequence of loads into an
14050    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14051    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14052    is true if we can reorder the registers because they are used commutatively
14053    subsequently.
14054    Returns true iff we could generate a new instruction.  */
14055
14056 bool
14057 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14058 {
14059   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14060   rtx mems[MAX_LDM_STM_OPS];
14061   int i, j, base_reg;
14062   rtx base_reg_rtx;
14063   HOST_WIDE_INT offset;
14064   int write_back = FALSE;
14065   int ldm_case;
14066   rtx addr;
14067
14068   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14069                                      &base_reg, &offset, !sort_regs);
14070
14071   if (ldm_case == 0)
14072     return false;
14073
14074   if (sort_regs)
14075     for (i = 0; i < nops - 1; i++)
14076       for (j = i + 1; j < nops; j++)
14077         if (regs[i] > regs[j])
14078           {
14079             int t = regs[i];
14080             regs[i] = regs[j];
14081             regs[j] = t;
14082           }
14083   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14084
14085   if (TARGET_THUMB1)
14086     {
14087       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14088       gcc_assert (ldm_case == 1 || ldm_case == 5);
14089       write_back = TRUE;
14090     }
14091
14092   if (ldm_case == 5)
14093     {
14094       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14095       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14096       offset = 0;
14097       if (!TARGET_THUMB1)
14098         {
14099           base_reg = regs[0];
14100           base_reg_rtx = newbase;
14101         }
14102     }
14103
14104   for (i = 0; i < nops; i++)
14105     {
14106       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14107       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14108                                               SImode, addr, 0);
14109     }
14110   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14111                                       write_back ? offset + i * 4 : 0));
14112   return true;
14113 }
14114
14115 /* Called from a peephole2 expander to turn a sequence of stores into an
14116    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14117    NOPS indicates how many separate stores we are trying to combine.
14118    Returns true iff we could generate a new instruction.  */
14119
14120 bool
14121 gen_stm_seq (rtx *operands, int nops)
14122 {
14123   int i;
14124   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14125   rtx mems[MAX_LDM_STM_OPS];
14126   int base_reg;
14127   rtx base_reg_rtx;
14128   HOST_WIDE_INT offset;
14129   int write_back = FALSE;
14130   int stm_case;
14131   rtx addr;
14132   bool base_reg_dies;
14133
14134   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14135                                       mem_order, &base_reg, &offset, true);
14136
14137   if (stm_case == 0)
14138     return false;
14139
14140   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14141
14142   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14143   if (TARGET_THUMB1)
14144     {
14145       gcc_assert (base_reg_dies);
14146       write_back = TRUE;
14147     }
14148
14149   if (stm_case == 5)
14150     {
14151       gcc_assert (base_reg_dies);
14152       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14153       offset = 0;
14154     }
14155
14156   addr = plus_constant (Pmode, base_reg_rtx, offset);
14157
14158   for (i = 0; i < nops; i++)
14159     {
14160       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14161       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14162                                               SImode, addr, 0);
14163     }
14164   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14165                                        write_back ? offset + i * 4 : 0));
14166   return true;
14167 }
14168
14169 /* Called from a peephole2 expander to turn a sequence of stores that are
14170    preceded by constant loads into an STM instruction.  OPERANDS are the
14171    operands found by the peephole matcher; NOPS indicates how many
14172    separate stores we are trying to combine; there are 2 * NOPS
14173    instructions in the peephole.
14174    Returns true iff we could generate a new instruction.  */
14175
14176 bool
14177 gen_const_stm_seq (rtx *operands, int nops)
14178 {
14179   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14180   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14181   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14182   rtx mems[MAX_LDM_STM_OPS];
14183   int base_reg;
14184   rtx base_reg_rtx;
14185   HOST_WIDE_INT offset;
14186   int write_back = FALSE;
14187   int stm_case;
14188   rtx addr;
14189   bool base_reg_dies;
14190   int i, j;
14191   HARD_REG_SET allocated;
14192
14193   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14194                                       mem_order, &base_reg, &offset, false);
14195
14196   if (stm_case == 0)
14197     return false;
14198
14199   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14200
14201   /* If the same register is used more than once, try to find a free
14202      register.  */
14203   CLEAR_HARD_REG_SET (allocated);
14204   for (i = 0; i < nops; i++)
14205     {
14206       for (j = i + 1; j < nops; j++)
14207         if (regs[i] == regs[j])
14208           {
14209             rtx t = peep2_find_free_register (0, nops * 2,
14210                                               TARGET_THUMB1 ? "l" : "r",
14211                                               SImode, &allocated);
14212             if (t == NULL_RTX)
14213               return false;
14214             reg_rtxs[i] = t;
14215             regs[i] = REGNO (t);
14216           }
14217     }
14218
14219   /* Compute an ordering that maps the register numbers to an ascending
14220      sequence.  */
14221   reg_order[0] = 0;
14222   for (i = 0; i < nops; i++)
14223     if (regs[i] < regs[reg_order[0]])
14224       reg_order[0] = i;
14225
14226   for (i = 1; i < nops; i++)
14227     {
14228       int this_order = reg_order[i - 1];
14229       for (j = 0; j < nops; j++)
14230         if (regs[j] > regs[reg_order[i - 1]]
14231             && (this_order == reg_order[i - 1]
14232                 || regs[j] < regs[this_order]))
14233           this_order = j;
14234       reg_order[i] = this_order;
14235     }
14236
14237   /* Ensure that registers that must be live after the instruction end
14238      up with the correct value.  */
14239   for (i = 0; i < nops; i++)
14240     {
14241       int this_order = reg_order[i];
14242       if ((this_order != mem_order[i]
14243            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14244           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14245         return false;
14246     }
14247
14248   /* Load the constants.  */
14249   for (i = 0; i < nops; i++)
14250     {
14251       rtx op = operands[2 * nops + mem_order[i]];
14252       sorted_regs[i] = regs[reg_order[i]];
14253       emit_move_insn (reg_rtxs[reg_order[i]], op);
14254     }
14255
14256   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14257
14258   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14259   if (TARGET_THUMB1)
14260     {
14261       gcc_assert (base_reg_dies);
14262       write_back = TRUE;
14263     }
14264
14265   if (stm_case == 5)
14266     {
14267       gcc_assert (base_reg_dies);
14268       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14269       offset = 0;
14270     }
14271
14272   addr = plus_constant (Pmode, base_reg_rtx, offset);
14273
14274   for (i = 0; i < nops; i++)
14275     {
14276       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14277       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14278                                               SImode, addr, 0);
14279     }
14280   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14281                                        write_back ? offset + i * 4 : 0));
14282   return true;
14283 }
14284
14285 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14286    unaligned copies on processors which support unaligned semantics for those
14287    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14288    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14289    An interleave factor of 1 (the minimum) will perform no interleaving.
14290    Load/store multiple are used for aligned addresses where possible.  */
14291
14292 static void
14293 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14294                                    HOST_WIDE_INT length,
14295                                    unsigned int interleave_factor)
14296 {
14297   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14298   int *regnos = XALLOCAVEC (int, interleave_factor);
14299   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14300   HOST_WIDE_INT i, j;
14301   HOST_WIDE_INT remaining = length, words;
14302   rtx halfword_tmp = NULL, byte_tmp = NULL;
14303   rtx dst, src;
14304   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14305   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14306   HOST_WIDE_INT srcoffset, dstoffset;
14307   HOST_WIDE_INT src_autoinc, dst_autoinc;
14308   rtx mem, addr;
14309
14310   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14311
14312   /* Use hard registers if we have aligned source or destination so we can use
14313      load/store multiple with contiguous registers.  */
14314   if (dst_aligned || src_aligned)
14315     for (i = 0; i < interleave_factor; i++)
14316       regs[i] = gen_rtx_REG (SImode, i);
14317   else
14318     for (i = 0; i < interleave_factor; i++)
14319       regs[i] = gen_reg_rtx (SImode);
14320
14321   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14322   src = copy_addr_to_reg (XEXP (srcbase, 0));
14323
14324   srcoffset = dstoffset = 0;
14325
14326   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14327      For copying the last bytes we want to subtract this offset again.  */
14328   src_autoinc = dst_autoinc = 0;
14329
14330   for (i = 0; i < interleave_factor; i++)
14331     regnos[i] = i;
14332
14333   /* Copy BLOCK_SIZE_BYTES chunks.  */
14334
14335   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14336     {
14337       /* Load words.  */
14338       if (src_aligned && interleave_factor > 1)
14339         {
14340           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14341                                             TRUE, srcbase, &srcoffset));
14342           src_autoinc += UNITS_PER_WORD * interleave_factor;
14343         }
14344       else
14345         {
14346           for (j = 0; j < interleave_factor; j++)
14347             {
14348               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14349                                                  - src_autoinc));
14350               mem = adjust_automodify_address (srcbase, SImode, addr,
14351                                                srcoffset + j * UNITS_PER_WORD);
14352               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14353             }
14354           srcoffset += block_size_bytes;
14355         }
14356
14357       /* Store words.  */
14358       if (dst_aligned && interleave_factor > 1)
14359         {
14360           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14361                                              TRUE, dstbase, &dstoffset));
14362           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14363         }
14364       else
14365         {
14366           for (j = 0; j < interleave_factor; j++)
14367             {
14368               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14369                                                  - dst_autoinc));
14370               mem = adjust_automodify_address (dstbase, SImode, addr,
14371                                                dstoffset + j * UNITS_PER_WORD);
14372               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14373             }
14374           dstoffset += block_size_bytes;
14375         }
14376
14377       remaining -= block_size_bytes;
14378     }
14379
14380   /* Copy any whole words left (note these aren't interleaved with any
14381      subsequent halfword/byte load/stores in the interests of simplicity).  */
14382
14383   words = remaining / UNITS_PER_WORD;
14384
14385   gcc_assert (words < interleave_factor);
14386
14387   if (src_aligned && words > 1)
14388     {
14389       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14390                                         &srcoffset));
14391       src_autoinc += UNITS_PER_WORD * words;
14392     }
14393   else
14394     {
14395       for (j = 0; j < words; j++)
14396         {
14397           addr = plus_constant (Pmode, src,
14398                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14399           mem = adjust_automodify_address (srcbase, SImode, addr,
14400                                            srcoffset + j * UNITS_PER_WORD);
14401           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14402         }
14403       srcoffset += words * UNITS_PER_WORD;
14404     }
14405
14406   if (dst_aligned && words > 1)
14407     {
14408       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14409                                          &dstoffset));
14410       dst_autoinc += words * UNITS_PER_WORD;
14411     }
14412   else
14413     {
14414       for (j = 0; j < words; j++)
14415         {
14416           addr = plus_constant (Pmode, dst,
14417                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14418           mem = adjust_automodify_address (dstbase, SImode, addr,
14419                                            dstoffset + j * UNITS_PER_WORD);
14420           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14421         }
14422       dstoffset += words * UNITS_PER_WORD;
14423     }
14424
14425   remaining -= words * UNITS_PER_WORD;
14426
14427   gcc_assert (remaining < 4);
14428
14429   /* Copy a halfword if necessary.  */
14430
14431   if (remaining >= 2)
14432     {
14433       halfword_tmp = gen_reg_rtx (SImode);
14434
14435       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14436       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14437       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14438
14439       /* Either write out immediately, or delay until we've loaded the last
14440          byte, depending on interleave factor.  */
14441       if (interleave_factor == 1)
14442         {
14443           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14444           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14445           emit_insn (gen_unaligned_storehi (mem,
14446                        gen_lowpart (HImode, halfword_tmp)));
14447           halfword_tmp = NULL;
14448           dstoffset += 2;
14449         }
14450
14451       remaining -= 2;
14452       srcoffset += 2;
14453     }
14454
14455   gcc_assert (remaining < 2);
14456
14457   /* Copy last byte.  */
14458
14459   if ((remaining & 1) != 0)
14460     {
14461       byte_tmp = gen_reg_rtx (SImode);
14462
14463       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14464       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14465       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14466
14467       if (interleave_factor == 1)
14468         {
14469           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14470           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14471           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14472           byte_tmp = NULL;
14473           dstoffset++;
14474         }
14475
14476       remaining--;
14477       srcoffset++;
14478     }
14479
14480   /* Store last halfword if we haven't done so already.  */
14481
14482   if (halfword_tmp)
14483     {
14484       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14485       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14486       emit_insn (gen_unaligned_storehi (mem,
14487                    gen_lowpart (HImode, halfword_tmp)));
14488       dstoffset += 2;
14489     }
14490
14491   /* Likewise for last byte.  */
14492
14493   if (byte_tmp)
14494     {
14495       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14496       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14497       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14498       dstoffset++;
14499     }
14500
14501   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14502 }
14503
14504 /* From mips_adjust_block_mem:
14505
14506    Helper function for doing a loop-based block operation on memory
14507    reference MEM.  Each iteration of the loop will operate on LENGTH
14508    bytes of MEM.
14509
14510    Create a new base register for use within the loop and point it to
14511    the start of MEM.  Create a new memory reference that uses this
14512    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14513
14514 static void
14515 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14516                       rtx *loop_mem)
14517 {
14518   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14519
14520   /* Although the new mem does not refer to a known location,
14521      it does keep up to LENGTH bytes of alignment.  */
14522   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14523   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14524 }
14525
14526 /* From mips_block_move_loop:
14527
14528    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14529    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14530    the memory regions do not overlap.  */
14531
14532 static void
14533 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14534                                unsigned int interleave_factor,
14535                                HOST_WIDE_INT bytes_per_iter)
14536 {
14537   rtx src_reg, dest_reg, final_src, test;
14538   HOST_WIDE_INT leftover;
14539
14540   leftover = length % bytes_per_iter;
14541   length -= leftover;
14542
14543   /* Create registers and memory references for use within the loop.  */
14544   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14545   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14546
14547   /* Calculate the value that SRC_REG should have after the last iteration of
14548      the loop.  */
14549   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14550                                    0, 0, OPTAB_WIDEN);
14551
14552   /* Emit the start of the loop.  */
14553   rtx_code_label *label = gen_label_rtx ();
14554   emit_label (label);
14555
14556   /* Emit the loop body.  */
14557   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14558                                      interleave_factor);
14559
14560   /* Move on to the next block.  */
14561   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14562   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14563
14564   /* Emit the loop condition.  */
14565   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14566   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14567
14568   /* Mop up any left-over bytes.  */
14569   if (leftover)
14570     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14571 }
14572
14573 /* Emit a block move when either the source or destination is unaligned (not
14574    aligned to a four-byte boundary).  This may need further tuning depending on
14575    core type, optimize_size setting, etc.  */
14576
14577 static int
14578 arm_movmemqi_unaligned (rtx *operands)
14579 {
14580   HOST_WIDE_INT length = INTVAL (operands[2]);
14581
14582   if (optimize_size)
14583     {
14584       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14585       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14586       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14587          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14588          or dst_aligned though: allow more interleaving in those cases since the
14589          resulting code can be smaller.  */
14590       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14591       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14592
14593       if (length > 12)
14594         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14595                                        interleave_factor, bytes_per_iter);
14596       else
14597         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14598                                            interleave_factor);
14599     }
14600   else
14601     {
14602       /* Note that the loop created by arm_block_move_unaligned_loop may be
14603          subject to loop unrolling, which makes tuning this condition a little
14604          redundant.  */
14605       if (length > 32)
14606         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14607       else
14608         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14609     }
14610
14611   return 1;
14612 }
14613
14614 int
14615 arm_gen_movmemqi (rtx *operands)
14616 {
14617   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14618   HOST_WIDE_INT srcoffset, dstoffset;
14619   int i;
14620   rtx src, dst, srcbase, dstbase;
14621   rtx part_bytes_reg = NULL;
14622   rtx mem;
14623
14624   if (!CONST_INT_P (operands[2])
14625       || !CONST_INT_P (operands[3])
14626       || INTVAL (operands[2]) > 64)
14627     return 0;
14628
14629   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14630     return arm_movmemqi_unaligned (operands);
14631
14632   if (INTVAL (operands[3]) & 3)
14633     return 0;
14634
14635   dstbase = operands[0];
14636   srcbase = operands[1];
14637
14638   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14639   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14640
14641   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14642   out_words_to_go = INTVAL (operands[2]) / 4;
14643   last_bytes = INTVAL (operands[2]) & 3;
14644   dstoffset = srcoffset = 0;
14645
14646   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14647     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14648
14649   for (i = 0; in_words_to_go >= 2; i+=4)
14650     {
14651       if (in_words_to_go > 4)
14652         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14653                                           TRUE, srcbase, &srcoffset));
14654       else
14655         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14656                                           src, FALSE, srcbase,
14657                                           &srcoffset));
14658
14659       if (out_words_to_go)
14660         {
14661           if (out_words_to_go > 4)
14662             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14663                                                TRUE, dstbase, &dstoffset));
14664           else if (out_words_to_go != 1)
14665             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14666                                                out_words_to_go, dst,
14667                                                (last_bytes == 0
14668                                                 ? FALSE : TRUE),
14669                                                dstbase, &dstoffset));
14670           else
14671             {
14672               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14673               emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14674               if (last_bytes != 0)
14675                 {
14676                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14677                   dstoffset += 4;
14678                 }
14679             }
14680         }
14681
14682       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14683       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14684     }
14685
14686   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14687   if (out_words_to_go)
14688     {
14689       rtx sreg;
14690
14691       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14692       sreg = copy_to_reg (mem);
14693
14694       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14695       emit_move_insn (mem, sreg);
14696       in_words_to_go--;
14697
14698       gcc_assert (!in_words_to_go);     /* Sanity check */
14699     }
14700
14701   if (in_words_to_go)
14702     {
14703       gcc_assert (in_words_to_go > 0);
14704
14705       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14706       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14707     }
14708
14709   gcc_assert (!last_bytes || part_bytes_reg);
14710
14711   if (BYTES_BIG_ENDIAN && last_bytes)
14712     {
14713       rtx tmp = gen_reg_rtx (SImode);
14714
14715       /* The bytes we want are in the top end of the word.  */
14716       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14717                               GEN_INT (8 * (4 - last_bytes))));
14718       part_bytes_reg = tmp;
14719
14720       while (last_bytes)
14721         {
14722           mem = adjust_automodify_address (dstbase, QImode,
14723                                            plus_constant (Pmode, dst,
14724                                                           last_bytes - 1),
14725                                            dstoffset + last_bytes - 1);
14726           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14727
14728           if (--last_bytes)
14729             {
14730               tmp = gen_reg_rtx (SImode);
14731               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14732               part_bytes_reg = tmp;
14733             }
14734         }
14735
14736     }
14737   else
14738     {
14739       if (last_bytes > 1)
14740         {
14741           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14742           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14743           last_bytes -= 2;
14744           if (last_bytes)
14745             {
14746               rtx tmp = gen_reg_rtx (SImode);
14747               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14748               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14749               part_bytes_reg = tmp;
14750               dstoffset += 2;
14751             }
14752         }
14753
14754       if (last_bytes)
14755         {
14756           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14757           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14758         }
14759     }
14760
14761   return 1;
14762 }
14763
14764 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14765 by mode size.  */
14766 inline static rtx
14767 next_consecutive_mem (rtx mem)
14768 {
14769   machine_mode mode = GET_MODE (mem);
14770   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14771   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14772
14773   return adjust_automodify_address (mem, mode, addr, offset);
14774 }
14775
14776 /* Copy using LDRD/STRD instructions whenever possible.
14777    Returns true upon success. */
14778 bool
14779 gen_movmem_ldrd_strd (rtx *operands)
14780 {
14781   unsigned HOST_WIDE_INT len;
14782   HOST_WIDE_INT align;
14783   rtx src, dst, base;
14784   rtx reg0;
14785   bool src_aligned, dst_aligned;
14786   bool src_volatile, dst_volatile;
14787
14788   gcc_assert (CONST_INT_P (operands[2]));
14789   gcc_assert (CONST_INT_P (operands[3]));
14790
14791   len = UINTVAL (operands[2]);
14792   if (len > 64)
14793     return false;
14794
14795   /* Maximum alignment we can assume for both src and dst buffers.  */
14796   align = INTVAL (operands[3]);
14797
14798   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14799     return false;
14800
14801   /* Place src and dst addresses in registers
14802      and update the corresponding mem rtx.  */
14803   dst = operands[0];
14804   dst_volatile = MEM_VOLATILE_P (dst);
14805   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14806   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14807   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14808
14809   src = operands[1];
14810   src_volatile = MEM_VOLATILE_P (src);
14811   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14812   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14813   src = adjust_automodify_address (src, VOIDmode, base, 0);
14814
14815   if (!unaligned_access && !(src_aligned && dst_aligned))
14816     return false;
14817
14818   if (src_volatile || dst_volatile)
14819     return false;
14820
14821   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14822   if (!(dst_aligned || src_aligned))
14823     return arm_gen_movmemqi (operands);
14824
14825   src = adjust_address (src, DImode, 0);
14826   dst = adjust_address (dst, DImode, 0);
14827   while (len >= 8)
14828     {
14829       len -= 8;
14830       reg0 = gen_reg_rtx (DImode);
14831       if (src_aligned)
14832         emit_move_insn (reg0, src);
14833       else
14834         emit_insn (gen_unaligned_loaddi (reg0, src));
14835
14836       if (dst_aligned)
14837         emit_move_insn (dst, reg0);
14838       else
14839         emit_insn (gen_unaligned_storedi (dst, reg0));
14840
14841       src = next_consecutive_mem (src);
14842       dst = next_consecutive_mem (dst);
14843     }
14844
14845   gcc_assert (len < 8);
14846   if (len >= 4)
14847     {
14848       /* More than a word but less than a double-word to copy.  Copy a word.  */
14849       reg0 = gen_reg_rtx (SImode);
14850       src = adjust_address (src, SImode, 0);
14851       dst = adjust_address (dst, SImode, 0);
14852       if (src_aligned)
14853         emit_move_insn (reg0, src);
14854       else
14855         emit_insn (gen_unaligned_loadsi (reg0, src));
14856
14857       if (dst_aligned)
14858         emit_move_insn (dst, reg0);
14859       else
14860         emit_insn (gen_unaligned_storesi (dst, reg0));
14861
14862       src = next_consecutive_mem (src);
14863       dst = next_consecutive_mem (dst);
14864       len -= 4;
14865     }
14866
14867   if (len == 0)
14868     return true;
14869
14870   /* Copy the remaining bytes.  */
14871   if (len >= 2)
14872     {
14873       dst = adjust_address (dst, HImode, 0);
14874       src = adjust_address (src, HImode, 0);
14875       reg0 = gen_reg_rtx (SImode);
14876       if (src_aligned)
14877         emit_insn (gen_zero_extendhisi2 (reg0, src));
14878       else
14879         emit_insn (gen_unaligned_loadhiu (reg0, src));
14880
14881       if (dst_aligned)
14882         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14883       else
14884         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14885
14886       src = next_consecutive_mem (src);
14887       dst = next_consecutive_mem (dst);
14888       if (len == 2)
14889         return true;
14890     }
14891
14892   dst = adjust_address (dst, QImode, 0);
14893   src = adjust_address (src, QImode, 0);
14894   reg0 = gen_reg_rtx (QImode);
14895   emit_move_insn (reg0, src);
14896   emit_move_insn (dst, reg0);
14897   return true;
14898 }
14899
14900 /* Select a dominance comparison mode if possible for a test of the general
14901    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14902    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14903    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14904    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14905    In all cases OP will be either EQ or NE, but we don't need to know which
14906    here.  If we are unable to support a dominance comparison we return
14907    CC mode.  This will then fail to match for the RTL expressions that
14908    generate this call.  */
14909 machine_mode
14910 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14911 {
14912   enum rtx_code cond1, cond2;
14913   int swapped = 0;
14914
14915   /* Currently we will probably get the wrong result if the individual
14916      comparisons are not simple.  This also ensures that it is safe to
14917      reverse a comparison if necessary.  */
14918   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14919        != CCmode)
14920       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14921           != CCmode))
14922     return CCmode;
14923
14924   /* The if_then_else variant of this tests the second condition if the
14925      first passes, but is true if the first fails.  Reverse the first
14926      condition to get a true "inclusive-or" expression.  */
14927   if (cond_or == DOM_CC_NX_OR_Y)
14928     cond1 = reverse_condition (cond1);
14929
14930   /* If the comparisons are not equal, and one doesn't dominate the other,
14931      then we can't do this.  */
14932   if (cond1 != cond2
14933       && !comparison_dominates_p (cond1, cond2)
14934       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14935     return CCmode;
14936
14937   if (swapped)
14938     {
14939       enum rtx_code temp = cond1;
14940       cond1 = cond2;
14941       cond2 = temp;
14942     }
14943
14944   switch (cond1)
14945     {
14946     case EQ:
14947       if (cond_or == DOM_CC_X_AND_Y)
14948         return CC_DEQmode;
14949
14950       switch (cond2)
14951         {
14952         case EQ: return CC_DEQmode;
14953         case LE: return CC_DLEmode;
14954         case LEU: return CC_DLEUmode;
14955         case GE: return CC_DGEmode;
14956         case GEU: return CC_DGEUmode;
14957         default: gcc_unreachable ();
14958         }
14959
14960     case LT:
14961       if (cond_or == DOM_CC_X_AND_Y)
14962         return CC_DLTmode;
14963
14964       switch (cond2)
14965         {
14966         case  LT:
14967             return CC_DLTmode;
14968         case LE:
14969           return CC_DLEmode;
14970         case NE:
14971           return CC_DNEmode;
14972         default:
14973           gcc_unreachable ();
14974         }
14975
14976     case GT:
14977       if (cond_or == DOM_CC_X_AND_Y)
14978         return CC_DGTmode;
14979
14980       switch (cond2)
14981         {
14982         case GT:
14983           return CC_DGTmode;
14984         case GE:
14985           return CC_DGEmode;
14986         case NE:
14987           return CC_DNEmode;
14988         default:
14989           gcc_unreachable ();
14990         }
14991
14992     case LTU:
14993       if (cond_or == DOM_CC_X_AND_Y)
14994         return CC_DLTUmode;
14995
14996       switch (cond2)
14997         {
14998         case LTU:
14999           return CC_DLTUmode;
15000         case LEU:
15001           return CC_DLEUmode;
15002         case NE:
15003           return CC_DNEmode;
15004         default:
15005           gcc_unreachable ();
15006         }
15007
15008     case GTU:
15009       if (cond_or == DOM_CC_X_AND_Y)
15010         return CC_DGTUmode;
15011
15012       switch (cond2)
15013         {
15014         case GTU:
15015           return CC_DGTUmode;
15016         case GEU:
15017           return CC_DGEUmode;
15018         case NE:
15019           return CC_DNEmode;
15020         default:
15021           gcc_unreachable ();
15022         }
15023
15024     /* The remaining cases only occur when both comparisons are the
15025        same.  */
15026     case NE:
15027       gcc_assert (cond1 == cond2);
15028       return CC_DNEmode;
15029
15030     case LE:
15031       gcc_assert (cond1 == cond2);
15032       return CC_DLEmode;
15033
15034     case GE:
15035       gcc_assert (cond1 == cond2);
15036       return CC_DGEmode;
15037
15038     case LEU:
15039       gcc_assert (cond1 == cond2);
15040       return CC_DLEUmode;
15041
15042     case GEU:
15043       gcc_assert (cond1 == cond2);
15044       return CC_DGEUmode;
15045
15046     default:
15047       gcc_unreachable ();
15048     }
15049 }
15050
15051 machine_mode
15052 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15053 {
15054   /* All floating point compares return CCFP if it is an equality
15055      comparison, and CCFPE otherwise.  */
15056   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15057     {
15058       switch (op)
15059         {
15060         case EQ:
15061         case NE:
15062         case UNORDERED:
15063         case ORDERED:
15064         case UNLT:
15065         case UNLE:
15066         case UNGT:
15067         case UNGE:
15068         case UNEQ:
15069         case LTGT:
15070           return CCFPmode;
15071
15072         case LT:
15073         case LE:
15074         case GT:
15075         case GE:
15076           return CCFPEmode;
15077
15078         default:
15079           gcc_unreachable ();
15080         }
15081     }
15082
15083   /* A compare with a shifted operand.  Because of canonicalization, the
15084      comparison will have to be swapped when we emit the assembler.  */
15085   if (GET_MODE (y) == SImode
15086       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15087       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15088           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15089           || GET_CODE (x) == ROTATERT))
15090     return CC_SWPmode;
15091
15092   /* This operation is performed swapped, but since we only rely on the Z
15093      flag we don't need an additional mode.  */
15094   if (GET_MODE (y) == SImode
15095       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15096       && GET_CODE (x) == NEG
15097       && (op == EQ || op == NE))
15098     return CC_Zmode;
15099
15100   /* This is a special case that is used by combine to allow a
15101      comparison of a shifted byte load to be split into a zero-extend
15102      followed by a comparison of the shifted integer (only valid for
15103      equalities and unsigned inequalities).  */
15104   if (GET_MODE (x) == SImode
15105       && GET_CODE (x) == ASHIFT
15106       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15107       && GET_CODE (XEXP (x, 0)) == SUBREG
15108       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15109       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15110       && (op == EQ || op == NE
15111           || op == GEU || op == GTU || op == LTU || op == LEU)
15112       && CONST_INT_P (y))
15113     return CC_Zmode;
15114
15115   /* A construct for a conditional compare, if the false arm contains
15116      0, then both conditions must be true, otherwise either condition
15117      must be true.  Not all conditions are possible, so CCmode is
15118      returned if it can't be done.  */
15119   if (GET_CODE (x) == IF_THEN_ELSE
15120       && (XEXP (x, 2) == const0_rtx
15121           || XEXP (x, 2) == const1_rtx)
15122       && COMPARISON_P (XEXP (x, 0))
15123       && COMPARISON_P (XEXP (x, 1)))
15124     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15125                                          INTVAL (XEXP (x, 2)));
15126
15127   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15128   if (GET_CODE (x) == AND
15129       && (op == EQ || op == NE)
15130       && COMPARISON_P (XEXP (x, 0))
15131       && COMPARISON_P (XEXP (x, 1)))
15132     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15133                                          DOM_CC_X_AND_Y);
15134
15135   if (GET_CODE (x) == IOR
15136       && (op == EQ || op == NE)
15137       && COMPARISON_P (XEXP (x, 0))
15138       && COMPARISON_P (XEXP (x, 1)))
15139     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15140                                          DOM_CC_X_OR_Y);
15141
15142   /* An operation (on Thumb) where we want to test for a single bit.
15143      This is done by shifting that bit up into the top bit of a
15144      scratch register; we can then branch on the sign bit.  */
15145   if (TARGET_THUMB1
15146       && GET_MODE (x) == SImode
15147       && (op == EQ || op == NE)
15148       && GET_CODE (x) == ZERO_EXTRACT
15149       && XEXP (x, 1) == const1_rtx)
15150     return CC_Nmode;
15151
15152   /* An operation that sets the condition codes as a side-effect, the
15153      V flag is not set correctly, so we can only use comparisons where
15154      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15155      instead.)  */
15156   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15157   if (GET_MODE (x) == SImode
15158       && y == const0_rtx
15159       && (op == EQ || op == NE || op == LT || op == GE)
15160       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15161           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15162           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15163           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15164           || GET_CODE (x) == LSHIFTRT
15165           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15166           || GET_CODE (x) == ROTATERT
15167           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15168     return CC_NOOVmode;
15169
15170   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15171     return CC_Zmode;
15172
15173   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15174       && GET_CODE (x) == PLUS
15175       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15176     return CC_Cmode;
15177
15178   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15179     {
15180       switch (op)
15181         {
15182         case EQ:
15183         case NE:
15184           /* A DImode comparison against zero can be implemented by
15185              or'ing the two halves together.  */
15186           if (y == const0_rtx)
15187             return CC_Zmode;
15188
15189           /* We can do an equality test in three Thumb instructions.  */
15190           if (!TARGET_32BIT)
15191             return CC_Zmode;
15192
15193           /* FALLTHROUGH */
15194
15195         case LTU:
15196         case LEU:
15197         case GTU:
15198         case GEU:
15199           /* DImode unsigned comparisons can be implemented by cmp +
15200              cmpeq without a scratch register.  Not worth doing in
15201              Thumb-2.  */
15202           if (TARGET_32BIT)
15203             return CC_CZmode;
15204
15205           /* FALLTHROUGH */
15206
15207         case LT:
15208         case LE:
15209         case GT:
15210         case GE:
15211           /* DImode signed and unsigned comparisons can be implemented
15212              by cmp + sbcs with a scratch register, but that does not
15213              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15214           gcc_assert (op != EQ && op != NE);
15215           return CC_NCVmode;
15216
15217         default:
15218           gcc_unreachable ();
15219         }
15220     }
15221
15222   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15223     return GET_MODE (x);
15224
15225   return CCmode;
15226 }
15227
15228 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15229    return the rtx for register 0 in the proper mode.  FP means this is a
15230    floating point compare: I don't think that it is needed on the arm.  */
15231 rtx
15232 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15233 {
15234   machine_mode mode;
15235   rtx cc_reg;
15236   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15237
15238   /* We might have X as a constant, Y as a register because of the predicates
15239      used for cmpdi.  If so, force X to a register here.  */
15240   if (dimode_comparison && !REG_P (x))
15241     x = force_reg (DImode, x);
15242
15243   mode = SELECT_CC_MODE (code, x, y);
15244   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15245
15246   if (dimode_comparison
15247       && mode != CC_CZmode)
15248     {
15249       rtx clobber, set;
15250
15251       /* To compare two non-zero values for equality, XOR them and
15252          then compare against zero.  Not used for ARM mode; there
15253          CC_CZmode is cheaper.  */
15254       if (mode == CC_Zmode && y != const0_rtx)
15255         {
15256           gcc_assert (!reload_completed);
15257           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15258           y = const0_rtx;
15259         }
15260
15261       /* A scratch register is required.  */
15262       if (reload_completed)
15263         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15264       else
15265         scratch = gen_rtx_SCRATCH (SImode);
15266
15267       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15268       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15269       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15270     }
15271   else
15272     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15273
15274   return cc_reg;
15275 }
15276
15277 /* Generate a sequence of insns that will generate the correct return
15278    address mask depending on the physical architecture that the program
15279    is running on.  */
15280 rtx
15281 arm_gen_return_addr_mask (void)
15282 {
15283   rtx reg = gen_reg_rtx (Pmode);
15284
15285   emit_insn (gen_return_addr_mask (reg));
15286   return reg;
15287 }
15288
15289 void
15290 arm_reload_in_hi (rtx *operands)
15291 {
15292   rtx ref = operands[1];
15293   rtx base, scratch;
15294   HOST_WIDE_INT offset = 0;
15295
15296   if (GET_CODE (ref) == SUBREG)
15297     {
15298       offset = SUBREG_BYTE (ref);
15299       ref = SUBREG_REG (ref);
15300     }
15301
15302   if (REG_P (ref))
15303     {
15304       /* We have a pseudo which has been spilt onto the stack; there
15305          are two cases here: the first where there is a simple
15306          stack-slot replacement and a second where the stack-slot is
15307          out of range, or is used as a subreg.  */
15308       if (reg_equiv_mem (REGNO (ref)))
15309         {
15310           ref = reg_equiv_mem (REGNO (ref));
15311           base = find_replacement (&XEXP (ref, 0));
15312         }
15313       else
15314         /* The slot is out of range, or was dressed up in a SUBREG.  */
15315         base = reg_equiv_address (REGNO (ref));
15316     }
15317   else
15318     base = find_replacement (&XEXP (ref, 0));
15319
15320   /* Handle the case where the address is too complex to be offset by 1.  */
15321   if (GET_CODE (base) == MINUS
15322       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15323     {
15324       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15325
15326       emit_set_insn (base_plus, base);
15327       base = base_plus;
15328     }
15329   else if (GET_CODE (base) == PLUS)
15330     {
15331       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15332       HOST_WIDE_INT hi, lo;
15333
15334       offset += INTVAL (XEXP (base, 1));
15335       base = XEXP (base, 0);
15336
15337       /* Rework the address into a legal sequence of insns.  */
15338       /* Valid range for lo is -4095 -> 4095 */
15339       lo = (offset >= 0
15340             ? (offset & 0xfff)
15341             : -((-offset) & 0xfff));
15342
15343       /* Corner case, if lo is the max offset then we would be out of range
15344          once we have added the additional 1 below, so bump the msb into the
15345          pre-loading insn(s).  */
15346       if (lo == 4095)
15347         lo &= 0x7ff;
15348
15349       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15350              ^ (HOST_WIDE_INT) 0x80000000)
15351             - (HOST_WIDE_INT) 0x80000000);
15352
15353       gcc_assert (hi + lo == offset);
15354
15355       if (hi != 0)
15356         {
15357           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15358
15359           /* Get the base address; addsi3 knows how to handle constants
15360              that require more than one insn.  */
15361           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15362           base = base_plus;
15363           offset = lo;
15364         }
15365     }
15366
15367   /* Operands[2] may overlap operands[0] (though it won't overlap
15368      operands[1]), that's why we asked for a DImode reg -- so we can
15369      use the bit that does not overlap.  */
15370   if (REGNO (operands[2]) == REGNO (operands[0]))
15371     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15372   else
15373     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15374
15375   emit_insn (gen_zero_extendqisi2 (scratch,
15376                                    gen_rtx_MEM (QImode,
15377                                                 plus_constant (Pmode, base,
15378                                                                offset))));
15379   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15380                                    gen_rtx_MEM (QImode,
15381                                                 plus_constant (Pmode, base,
15382                                                                offset + 1))));
15383   if (!BYTES_BIG_ENDIAN)
15384     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15385                    gen_rtx_IOR (SImode,
15386                                 gen_rtx_ASHIFT
15387                                 (SImode,
15388                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15389                                  GEN_INT (8)),
15390                                 scratch));
15391   else
15392     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15393                    gen_rtx_IOR (SImode,
15394                                 gen_rtx_ASHIFT (SImode, scratch,
15395                                                 GEN_INT (8)),
15396                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15397 }
15398
15399 /* Handle storing a half-word to memory during reload by synthesizing as two
15400    byte stores.  Take care not to clobber the input values until after we
15401    have moved them somewhere safe.  This code assumes that if the DImode
15402    scratch in operands[2] overlaps either the input value or output address
15403    in some way, then that value must die in this insn (we absolutely need
15404    two scratch registers for some corner cases).  */
15405 void
15406 arm_reload_out_hi (rtx *operands)
15407 {
15408   rtx ref = operands[0];
15409   rtx outval = operands[1];
15410   rtx base, scratch;
15411   HOST_WIDE_INT offset = 0;
15412
15413   if (GET_CODE (ref) == SUBREG)
15414     {
15415       offset = SUBREG_BYTE (ref);
15416       ref = SUBREG_REG (ref);
15417     }
15418
15419   if (REG_P (ref))
15420     {
15421       /* We have a pseudo which has been spilt onto the stack; there
15422          are two cases here: the first where there is a simple
15423          stack-slot replacement and a second where the stack-slot is
15424          out of range, or is used as a subreg.  */
15425       if (reg_equiv_mem (REGNO (ref)))
15426         {
15427           ref = reg_equiv_mem (REGNO (ref));
15428           base = find_replacement (&XEXP (ref, 0));
15429         }
15430       else
15431         /* The slot is out of range, or was dressed up in a SUBREG.  */
15432         base = reg_equiv_address (REGNO (ref));
15433     }
15434   else
15435     base = find_replacement (&XEXP (ref, 0));
15436
15437   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15438
15439   /* Handle the case where the address is too complex to be offset by 1.  */
15440   if (GET_CODE (base) == MINUS
15441       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15442     {
15443       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15444
15445       /* Be careful not to destroy OUTVAL.  */
15446       if (reg_overlap_mentioned_p (base_plus, outval))
15447         {
15448           /* Updating base_plus might destroy outval, see if we can
15449              swap the scratch and base_plus.  */
15450           if (!reg_overlap_mentioned_p (scratch, outval))
15451             {
15452               rtx tmp = scratch;
15453               scratch = base_plus;
15454               base_plus = tmp;
15455             }
15456           else
15457             {
15458               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15459
15460               /* Be conservative and copy OUTVAL into the scratch now,
15461                  this should only be necessary if outval is a subreg
15462                  of something larger than a word.  */
15463               /* XXX Might this clobber base?  I can't see how it can,
15464                  since scratch is known to overlap with OUTVAL, and
15465                  must be wider than a word.  */
15466               emit_insn (gen_movhi (scratch_hi, outval));
15467               outval = scratch_hi;
15468             }
15469         }
15470
15471       emit_set_insn (base_plus, base);
15472       base = base_plus;
15473     }
15474   else if (GET_CODE (base) == PLUS)
15475     {
15476       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15477       HOST_WIDE_INT hi, lo;
15478
15479       offset += INTVAL (XEXP (base, 1));
15480       base = XEXP (base, 0);
15481
15482       /* Rework the address into a legal sequence of insns.  */
15483       /* Valid range for lo is -4095 -> 4095 */
15484       lo = (offset >= 0
15485             ? (offset & 0xfff)
15486             : -((-offset) & 0xfff));
15487
15488       /* Corner case, if lo is the max offset then we would be out of range
15489          once we have added the additional 1 below, so bump the msb into the
15490          pre-loading insn(s).  */
15491       if (lo == 4095)
15492         lo &= 0x7ff;
15493
15494       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15495              ^ (HOST_WIDE_INT) 0x80000000)
15496             - (HOST_WIDE_INT) 0x80000000);
15497
15498       gcc_assert (hi + lo == offset);
15499
15500       if (hi != 0)
15501         {
15502           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15503
15504           /* Be careful not to destroy OUTVAL.  */
15505           if (reg_overlap_mentioned_p (base_plus, outval))
15506             {
15507               /* Updating base_plus might destroy outval, see if we
15508                  can swap the scratch and base_plus.  */
15509               if (!reg_overlap_mentioned_p (scratch, outval))
15510                 {
15511                   rtx tmp = scratch;
15512                   scratch = base_plus;
15513                   base_plus = tmp;
15514                 }
15515               else
15516                 {
15517                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15518
15519                   /* Be conservative and copy outval into scratch now,
15520                      this should only be necessary if outval is a
15521                      subreg of something larger than a word.  */
15522                   /* XXX Might this clobber base?  I can't see how it
15523                      can, since scratch is known to overlap with
15524                      outval.  */
15525                   emit_insn (gen_movhi (scratch_hi, outval));
15526                   outval = scratch_hi;
15527                 }
15528             }
15529
15530           /* Get the base address; addsi3 knows how to handle constants
15531              that require more than one insn.  */
15532           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15533           base = base_plus;
15534           offset = lo;
15535         }
15536     }
15537
15538   if (BYTES_BIG_ENDIAN)
15539     {
15540       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15541                                          plus_constant (Pmode, base,
15542                                                         offset + 1)),
15543                             gen_lowpart (QImode, outval)));
15544       emit_insn (gen_lshrsi3 (scratch,
15545                               gen_rtx_SUBREG (SImode, outval, 0),
15546                               GEN_INT (8)));
15547       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15548                                                                 offset)),
15549                             gen_lowpart (QImode, scratch)));
15550     }
15551   else
15552     {
15553       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15554                                                                 offset)),
15555                             gen_lowpart (QImode, outval)));
15556       emit_insn (gen_lshrsi3 (scratch,
15557                               gen_rtx_SUBREG (SImode, outval, 0),
15558                               GEN_INT (8)));
15559       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15560                                          plus_constant (Pmode, base,
15561                                                         offset + 1)),
15562                             gen_lowpart (QImode, scratch)));
15563     }
15564 }
15565
15566 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15567    (padded to the size of a word) should be passed in a register.  */
15568
15569 static bool
15570 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15571 {
15572   if (TARGET_AAPCS_BASED)
15573     return must_pass_in_stack_var_size (mode, type);
15574   else
15575     return must_pass_in_stack_var_size_or_pad (mode, type);
15576 }
15577
15578
15579 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15580    Return true if an argument passed on the stack should be padded upwards,
15581    i.e. if the least-significant byte has useful data.
15582    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15583    aggregate types are placed in the lowest memory address.  */
15584
15585 bool
15586 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15587 {
15588   if (!TARGET_AAPCS_BASED)
15589     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15590
15591   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15592     return false;
15593
15594   return true;
15595 }
15596
15597
15598 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15599    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15600    register has useful data, and return the opposite if the most
15601    significant byte does.  */
15602
15603 bool
15604 arm_pad_reg_upward (machine_mode mode,
15605                     tree type, int first ATTRIBUTE_UNUSED)
15606 {
15607   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15608     {
15609       /* For AAPCS, small aggregates, small fixed-point types,
15610          and small complex types are always padded upwards.  */
15611       if (type)
15612         {
15613           if ((AGGREGATE_TYPE_P (type)
15614                || TREE_CODE (type) == COMPLEX_TYPE
15615                || FIXED_POINT_TYPE_P (type))
15616               && int_size_in_bytes (type) <= 4)
15617             return true;
15618         }
15619       else
15620         {
15621           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15622               && GET_MODE_SIZE (mode) <= 4)
15623             return true;
15624         }
15625     }
15626
15627   /* Otherwise, use default padding.  */
15628   return !BYTES_BIG_ENDIAN;
15629 }
15630
15631 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15632    assuming that the address in the base register is word aligned.  */
15633 bool
15634 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15635 {
15636   HOST_WIDE_INT max_offset;
15637
15638   /* Offset must be a multiple of 4 in Thumb mode.  */
15639   if (TARGET_THUMB2 && ((offset & 3) != 0))
15640     return false;
15641
15642   if (TARGET_THUMB2)
15643     max_offset = 1020;
15644   else if (TARGET_ARM)
15645     max_offset = 255;
15646   else
15647     return false;
15648
15649   return ((offset <= max_offset) && (offset >= -max_offset));
15650 }
15651
15652 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15653    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15654    Assumes that the address in the base register RN is word aligned.  Pattern
15655    guarantees that both memory accesses use the same base register,
15656    the offsets are constants within the range, and the gap between the offsets is 4.
15657    If preload complete then check that registers are legal.  WBACK indicates whether
15658    address is updated.  LOAD indicates whether memory access is load or store.  */
15659 bool
15660 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15661                        bool wback, bool load)
15662 {
15663   unsigned int t, t2, n;
15664
15665   if (!reload_completed)
15666     return true;
15667
15668   if (!offset_ok_for_ldrd_strd (offset))
15669     return false;
15670
15671   t = REGNO (rt);
15672   t2 = REGNO (rt2);
15673   n = REGNO (rn);
15674
15675   if ((TARGET_THUMB2)
15676       && ((wback && (n == t || n == t2))
15677           || (t == SP_REGNUM)
15678           || (t == PC_REGNUM)
15679           || (t2 == SP_REGNUM)
15680           || (t2 == PC_REGNUM)
15681           || (!load && (n == PC_REGNUM))
15682           || (load && (t == t2))
15683           /* Triggers Cortex-M3 LDRD errata.  */
15684           || (!wback && load && fix_cm3_ldrd && (n == t))))
15685     return false;
15686
15687   if ((TARGET_ARM)
15688       && ((wback && (n == t || n == t2))
15689           || (t2 == PC_REGNUM)
15690           || (t % 2 != 0)   /* First destination register is not even.  */
15691           || (t2 != t + 1)
15692           /* PC can be used as base register (for offset addressing only),
15693              but it is depricated.  */
15694           || (n == PC_REGNUM)))
15695     return false;
15696
15697   return true;
15698 }
15699
15700 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15701    operand MEM's address contains an immediate offset from the base
15702    register and has no side effects, in which case it sets BASE and
15703    OFFSET accordingly.  */
15704 static bool
15705 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15706 {
15707   rtx addr;
15708
15709   gcc_assert (base != NULL && offset != NULL);
15710
15711   /* TODO: Handle more general memory operand patterns, such as
15712      PRE_DEC and PRE_INC.  */
15713
15714   if (side_effects_p (mem))
15715     return false;
15716
15717   /* Can't deal with subregs.  */
15718   if (GET_CODE (mem) == SUBREG)
15719     return false;
15720
15721   gcc_assert (MEM_P (mem));
15722
15723   *offset = const0_rtx;
15724
15725   addr = XEXP (mem, 0);
15726
15727   /* If addr isn't valid for DImode, then we can't handle it.  */
15728   if (!arm_legitimate_address_p (DImode, addr,
15729                                  reload_in_progress || reload_completed))
15730     return false;
15731
15732   if (REG_P (addr))
15733     {
15734       *base = addr;
15735       return true;
15736     }
15737   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15738     {
15739       *base = XEXP (addr, 0);
15740       *offset = XEXP (addr, 1);
15741       return (REG_P (*base) && CONST_INT_P (*offset));
15742     }
15743
15744   return false;
15745 }
15746
15747 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15748
15749 /* Called from a peephole2 to replace two word-size accesses with a
15750    single LDRD/STRD instruction.  Returns true iff we can generate a
15751    new instruction sequence.  That is, both accesses use the same base
15752    register and the gap between constant offsets is 4.  This function
15753    may reorder its operands to match ldrd/strd RTL templates.
15754    OPERANDS are the operands found by the peephole matcher;
15755    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15756    corresponding memory operands.  LOAD indicaates whether the access
15757    is load or store.  CONST_STORE indicates a store of constant
15758    integer values held in OPERANDS[4,5] and assumes that the pattern
15759    is of length 4 insn, for the purpose of checking dead registers.
15760    COMMUTE indicates that register operands may be reordered.  */
15761 bool
15762 gen_operands_ldrd_strd (rtx *operands, bool load,
15763                         bool const_store, bool commute)
15764 {
15765   int nops = 2;
15766   HOST_WIDE_INT offsets[2], offset;
15767   rtx base = NULL_RTX;
15768   rtx cur_base, cur_offset, tmp;
15769   int i, gap;
15770   HARD_REG_SET regset;
15771
15772   gcc_assert (!const_store || !load);
15773   /* Check that the memory references are immediate offsets from the
15774      same base register.  Extract the base register, the destination
15775      registers, and the corresponding memory offsets.  */
15776   for (i = 0; i < nops; i++)
15777     {
15778       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15779         return false;
15780
15781       if (i == 0)
15782         base = cur_base;
15783       else if (REGNO (base) != REGNO (cur_base))
15784         return false;
15785
15786       offsets[i] = INTVAL (cur_offset);
15787       if (GET_CODE (operands[i]) == SUBREG)
15788         {
15789           tmp = SUBREG_REG (operands[i]);
15790           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15791           operands[i] = tmp;
15792         }
15793     }
15794
15795   /* Make sure there is no dependency between the individual loads.  */
15796   if (load && REGNO (operands[0]) == REGNO (base))
15797     return false; /* RAW */
15798
15799   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15800     return false; /* WAW */
15801
15802   /* If the same input register is used in both stores
15803      when storing different constants, try to find a free register.
15804      For example, the code
15805         mov r0, 0
15806         str r0, [r2]
15807         mov r0, 1
15808         str r0, [r2, #4]
15809      can be transformed into
15810         mov r1, 0
15811         strd r1, r0, [r2]
15812      in Thumb mode assuming that r1 is free.  */
15813   if (const_store
15814       && REGNO (operands[0]) == REGNO (operands[1])
15815       && INTVAL (operands[4]) != INTVAL (operands[5]))
15816     {
15817     if (TARGET_THUMB2)
15818       {
15819         CLEAR_HARD_REG_SET (regset);
15820         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15821         if (tmp == NULL_RTX)
15822           return false;
15823
15824         /* Use the new register in the first load to ensure that
15825            if the original input register is not dead after peephole,
15826            then it will have the correct constant value.  */
15827         operands[0] = tmp;
15828       }
15829     else if (TARGET_ARM)
15830       {
15831         return false;
15832         int regno = REGNO (operands[0]);
15833         if (!peep2_reg_dead_p (4, operands[0]))
15834           {
15835             /* When the input register is even and is not dead after the
15836                pattern, it has to hold the second constant but we cannot
15837                form a legal STRD in ARM mode with this register as the second
15838                register.  */
15839             if (regno % 2 == 0)
15840               return false;
15841
15842             /* Is regno-1 free? */
15843             SET_HARD_REG_SET (regset);
15844             CLEAR_HARD_REG_BIT(regset, regno - 1);
15845             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15846             if (tmp == NULL_RTX)
15847               return false;
15848
15849             operands[0] = tmp;
15850           }
15851         else
15852           {
15853             /* Find a DImode register.  */
15854             CLEAR_HARD_REG_SET (regset);
15855             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15856             if (tmp != NULL_RTX)
15857               {
15858                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15859                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15860               }
15861             else
15862               {
15863                 /* Can we use the input register to form a DI register?  */
15864                 SET_HARD_REG_SET (regset);
15865                 CLEAR_HARD_REG_BIT(regset,
15866                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15867                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15868                 if (tmp == NULL_RTX)
15869                   return false;
15870                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15871               }
15872           }
15873
15874         gcc_assert (operands[0] != NULL_RTX);
15875         gcc_assert (operands[1] != NULL_RTX);
15876         gcc_assert (REGNO (operands[0]) % 2 == 0);
15877         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15878       }
15879     }
15880
15881   /* Make sure the instructions are ordered with lower memory access first.  */
15882   if (offsets[0] > offsets[1])
15883     {
15884       gap = offsets[0] - offsets[1];
15885       offset = offsets[1];
15886
15887       /* Swap the instructions such that lower memory is accessed first.  */
15888       SWAP_RTX (operands[0], operands[1]);
15889       SWAP_RTX (operands[2], operands[3]);
15890       if (const_store)
15891         SWAP_RTX (operands[4], operands[5]);
15892     }
15893   else
15894     {
15895       gap = offsets[1] - offsets[0];
15896       offset = offsets[0];
15897     }
15898
15899   /* Make sure accesses are to consecutive memory locations.  */
15900   if (gap != 4)
15901     return false;
15902
15903   /* Make sure we generate legal instructions.  */
15904   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15905                              false, load))
15906     return true;
15907
15908   /* In Thumb state, where registers are almost unconstrained, there
15909      is little hope to fix it.  */
15910   if (TARGET_THUMB2)
15911     return false;
15912
15913   if (load && commute)
15914     {
15915       /* Try reordering registers.  */
15916       SWAP_RTX (operands[0], operands[1]);
15917       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15918                                  false, load))
15919         return true;
15920     }
15921
15922   if (const_store)
15923     {
15924       /* If input registers are dead after this pattern, they can be
15925          reordered or replaced by other registers that are free in the
15926          current pattern.  */
15927       if (!peep2_reg_dead_p (4, operands[0])
15928           || !peep2_reg_dead_p (4, operands[1]))
15929         return false;
15930
15931       /* Try to reorder the input registers.  */
15932       /* For example, the code
15933            mov r0, 0
15934            mov r1, 1
15935            str r1, [r2]
15936            str r0, [r2, #4]
15937          can be transformed into
15938            mov r1, 0
15939            mov r0, 1
15940            strd r0, [r2]
15941       */
15942       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15943                                   false, false))
15944         {
15945           SWAP_RTX (operands[0], operands[1]);
15946           return true;
15947         }
15948
15949       /* Try to find a free DI register.  */
15950       CLEAR_HARD_REG_SET (regset);
15951       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15952       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15953       while (true)
15954         {
15955           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15956           if (tmp == NULL_RTX)
15957             return false;
15958
15959           /* DREG must be an even-numbered register in DImode.
15960              Split it into SI registers.  */
15961           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15962           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15963           gcc_assert (operands[0] != NULL_RTX);
15964           gcc_assert (operands[1] != NULL_RTX);
15965           gcc_assert (REGNO (operands[0]) % 2 == 0);
15966           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15967
15968           return (operands_ok_ldrd_strd (operands[0], operands[1],
15969                                          base, offset,
15970                                          false, load));
15971         }
15972     }
15973
15974   return false;
15975 }
15976 #undef SWAP_RTX
15977
15978
15979
15980 \f
15981 /* Print a symbolic form of X to the debug file, F.  */
15982 static void
15983 arm_print_value (FILE *f, rtx x)
15984 {
15985   switch (GET_CODE (x))
15986     {
15987     case CONST_INT:
15988       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15989       return;
15990
15991     case CONST_DOUBLE:
15992       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15993       return;
15994
15995     case CONST_VECTOR:
15996       {
15997         int i;
15998
15999         fprintf (f, "<");
16000         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16001           {
16002             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16003             if (i < (CONST_VECTOR_NUNITS (x) - 1))
16004               fputc (',', f);
16005           }
16006         fprintf (f, ">");
16007       }
16008       return;
16009
16010     case CONST_STRING:
16011       fprintf (f, "\"%s\"", XSTR (x, 0));
16012       return;
16013
16014     case SYMBOL_REF:
16015       fprintf (f, "`%s'", XSTR (x, 0));
16016       return;
16017
16018     case LABEL_REF:
16019       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16020       return;
16021
16022     case CONST:
16023       arm_print_value (f, XEXP (x, 0));
16024       return;
16025
16026     case PLUS:
16027       arm_print_value (f, XEXP (x, 0));
16028       fprintf (f, "+");
16029       arm_print_value (f, XEXP (x, 1));
16030       return;
16031
16032     case PC:
16033       fprintf (f, "pc");
16034       return;
16035
16036     default:
16037       fprintf (f, "????");
16038       return;
16039     }
16040 }
16041 \f
16042 /* Routines for manipulation of the constant pool.  */
16043
16044 /* Arm instructions cannot load a large constant directly into a
16045    register; they have to come from a pc relative load.  The constant
16046    must therefore be placed in the addressable range of the pc
16047    relative load.  Depending on the precise pc relative load
16048    instruction the range is somewhere between 256 bytes and 4k.  This
16049    means that we often have to dump a constant inside a function, and
16050    generate code to branch around it.
16051
16052    It is important to minimize this, since the branches will slow
16053    things down and make the code larger.
16054
16055    Normally we can hide the table after an existing unconditional
16056    branch so that there is no interruption of the flow, but in the
16057    worst case the code looks like this:
16058
16059         ldr     rn, L1
16060         ...
16061         b       L2
16062         align
16063         L1:     .long value
16064         L2:
16065         ...
16066
16067         ldr     rn, L3
16068         ...
16069         b       L4
16070         align
16071         L3:     .long value
16072         L4:
16073         ...
16074
16075    We fix this by performing a scan after scheduling, which notices
16076    which instructions need to have their operands fetched from the
16077    constant table and builds the table.
16078
16079    The algorithm starts by building a table of all the constants that
16080    need fixing up and all the natural barriers in the function (places
16081    where a constant table can be dropped without breaking the flow).
16082    For each fixup we note how far the pc-relative replacement will be
16083    able to reach and the offset of the instruction into the function.
16084
16085    Having built the table we then group the fixes together to form
16086    tables that are as large as possible (subject to addressing
16087    constraints) and emit each table of constants after the last
16088    barrier that is within range of all the instructions in the group.
16089    If a group does not contain a barrier, then we forcibly create one
16090    by inserting a jump instruction into the flow.  Once the table has
16091    been inserted, the insns are then modified to reference the
16092    relevant entry in the pool.
16093
16094    Possible enhancements to the algorithm (not implemented) are:
16095
16096    1) For some processors and object formats, there may be benefit in
16097    aligning the pools to the start of cache lines; this alignment
16098    would need to be taken into account when calculating addressability
16099    of a pool.  */
16100
16101 /* These typedefs are located at the start of this file, so that
16102    they can be used in the prototypes there.  This comment is to
16103    remind readers of that fact so that the following structures
16104    can be understood more easily.
16105
16106      typedef struct minipool_node    Mnode;
16107      typedef struct minipool_fixup   Mfix;  */
16108
16109 struct minipool_node
16110 {
16111   /* Doubly linked chain of entries.  */
16112   Mnode * next;
16113   Mnode * prev;
16114   /* The maximum offset into the code that this entry can be placed.  While
16115      pushing fixes for forward references, all entries are sorted in order
16116      of increasing max_address.  */
16117   HOST_WIDE_INT max_address;
16118   /* Similarly for an entry inserted for a backwards ref.  */
16119   HOST_WIDE_INT min_address;
16120   /* The number of fixes referencing this entry.  This can become zero
16121      if we "unpush" an entry.  In this case we ignore the entry when we
16122      come to emit the code.  */
16123   int refcount;
16124   /* The offset from the start of the minipool.  */
16125   HOST_WIDE_INT offset;
16126   /* The value in table.  */
16127   rtx value;
16128   /* The mode of value.  */
16129   machine_mode mode;
16130   /* The size of the value.  With iWMMXt enabled
16131      sizes > 4 also imply an alignment of 8-bytes.  */
16132   int fix_size;
16133 };
16134
16135 struct minipool_fixup
16136 {
16137   Mfix *            next;
16138   rtx_insn *        insn;
16139   HOST_WIDE_INT     address;
16140   rtx *             loc;
16141   machine_mode mode;
16142   int               fix_size;
16143   rtx               value;
16144   Mnode *           minipool;
16145   HOST_WIDE_INT     forwards;
16146   HOST_WIDE_INT     backwards;
16147 };
16148
16149 /* Fixes less than a word need padding out to a word boundary.  */
16150 #define MINIPOOL_FIX_SIZE(mode) \
16151   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16152
16153 static Mnode *  minipool_vector_head;
16154 static Mnode *  minipool_vector_tail;
16155 static rtx_code_label   *minipool_vector_label;
16156 static int      minipool_pad;
16157
16158 /* The linked list of all minipool fixes required for this function.  */
16159 Mfix *          minipool_fix_head;
16160 Mfix *          minipool_fix_tail;
16161 /* The fix entry for the current minipool, once it has been placed.  */
16162 Mfix *          minipool_barrier;
16163
16164 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16165 #define JUMP_TABLES_IN_TEXT_SECTION 0
16166 #endif
16167
16168 static HOST_WIDE_INT
16169 get_jump_table_size (rtx_jump_table_data *insn)
16170 {
16171   /* ADDR_VECs only take room if read-only data does into the text
16172      section.  */
16173   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16174     {
16175       rtx body = PATTERN (insn);
16176       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16177       HOST_WIDE_INT size;
16178       HOST_WIDE_INT modesize;
16179
16180       modesize = GET_MODE_SIZE (GET_MODE (body));
16181       size = modesize * XVECLEN (body, elt);
16182       switch (modesize)
16183         {
16184         case 1:
16185           /* Round up size  of TBB table to a halfword boundary.  */
16186           size = (size + 1) & ~(HOST_WIDE_INT)1;
16187           break;
16188         case 2:
16189           /* No padding necessary for TBH.  */
16190           break;
16191         case 4:
16192           /* Add two bytes for alignment on Thumb.  */
16193           if (TARGET_THUMB)
16194             size += 2;
16195           break;
16196         default:
16197           gcc_unreachable ();
16198         }
16199       return size;
16200     }
16201
16202   return 0;
16203 }
16204
16205 /* Return the maximum amount of padding that will be inserted before
16206    label LABEL.  */
16207
16208 static HOST_WIDE_INT
16209 get_label_padding (rtx label)
16210 {
16211   HOST_WIDE_INT align, min_insn_size;
16212
16213   align = 1 << label_to_alignment (label);
16214   min_insn_size = TARGET_THUMB ? 2 : 4;
16215   return align > min_insn_size ? align - min_insn_size : 0;
16216 }
16217
16218 /* Move a minipool fix MP from its current location to before MAX_MP.
16219    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16220    constraints may need updating.  */
16221 static Mnode *
16222 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16223                                HOST_WIDE_INT max_address)
16224 {
16225   /* The code below assumes these are different.  */
16226   gcc_assert (mp != max_mp);
16227
16228   if (max_mp == NULL)
16229     {
16230       if (max_address < mp->max_address)
16231         mp->max_address = max_address;
16232     }
16233   else
16234     {
16235       if (max_address > max_mp->max_address - mp->fix_size)
16236         mp->max_address = max_mp->max_address - mp->fix_size;
16237       else
16238         mp->max_address = max_address;
16239
16240       /* Unlink MP from its current position.  Since max_mp is non-null,
16241        mp->prev must be non-null.  */
16242       mp->prev->next = mp->next;
16243       if (mp->next != NULL)
16244         mp->next->prev = mp->prev;
16245       else
16246         minipool_vector_tail = mp->prev;
16247
16248       /* Re-insert it before MAX_MP.  */
16249       mp->next = max_mp;
16250       mp->prev = max_mp->prev;
16251       max_mp->prev = mp;
16252
16253       if (mp->prev != NULL)
16254         mp->prev->next = mp;
16255       else
16256         minipool_vector_head = mp;
16257     }
16258
16259   /* Save the new entry.  */
16260   max_mp = mp;
16261
16262   /* Scan over the preceding entries and adjust their addresses as
16263      required.  */
16264   while (mp->prev != NULL
16265          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16266     {
16267       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16268       mp = mp->prev;
16269     }
16270
16271   return max_mp;
16272 }
16273
16274 /* Add a constant to the minipool for a forward reference.  Returns the
16275    node added or NULL if the constant will not fit in this pool.  */
16276 static Mnode *
16277 add_minipool_forward_ref (Mfix *fix)
16278 {
16279   /* If set, max_mp is the first pool_entry that has a lower
16280      constraint than the one we are trying to add.  */
16281   Mnode *       max_mp = NULL;
16282   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16283   Mnode *       mp;
16284
16285   /* If the minipool starts before the end of FIX->INSN then this FIX
16286      can not be placed into the current pool.  Furthermore, adding the
16287      new constant pool entry may cause the pool to start FIX_SIZE bytes
16288      earlier.  */
16289   if (minipool_vector_head &&
16290       (fix->address + get_attr_length (fix->insn)
16291        >= minipool_vector_head->max_address - fix->fix_size))
16292     return NULL;
16293
16294   /* Scan the pool to see if a constant with the same value has
16295      already been added.  While we are doing this, also note the
16296      location where we must insert the constant if it doesn't already
16297      exist.  */
16298   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16299     {
16300       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16301           && fix->mode == mp->mode
16302           && (!LABEL_P (fix->value)
16303               || (CODE_LABEL_NUMBER (fix->value)
16304                   == CODE_LABEL_NUMBER (mp->value)))
16305           && rtx_equal_p (fix->value, mp->value))
16306         {
16307           /* More than one fix references this entry.  */
16308           mp->refcount++;
16309           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16310         }
16311
16312       /* Note the insertion point if necessary.  */
16313       if (max_mp == NULL
16314           && mp->max_address > max_address)
16315         max_mp = mp;
16316
16317       /* If we are inserting an 8-bytes aligned quantity and
16318          we have not already found an insertion point, then
16319          make sure that all such 8-byte aligned quantities are
16320          placed at the start of the pool.  */
16321       if (ARM_DOUBLEWORD_ALIGN
16322           && max_mp == NULL
16323           && fix->fix_size >= 8
16324           && mp->fix_size < 8)
16325         {
16326           max_mp = mp;
16327           max_address = mp->max_address;
16328         }
16329     }
16330
16331   /* The value is not currently in the minipool, so we need to create
16332      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16333      the end of the list since the placement is less constrained than
16334      any existing entry.  Otherwise, we insert the new fix before
16335      MAX_MP and, if necessary, adjust the constraints on the other
16336      entries.  */
16337   mp = XNEW (Mnode);
16338   mp->fix_size = fix->fix_size;
16339   mp->mode = fix->mode;
16340   mp->value = fix->value;
16341   mp->refcount = 1;
16342   /* Not yet required for a backwards ref.  */
16343   mp->min_address = -65536;
16344
16345   if (max_mp == NULL)
16346     {
16347       mp->max_address = max_address;
16348       mp->next = NULL;
16349       mp->prev = minipool_vector_tail;
16350
16351       if (mp->prev == NULL)
16352         {
16353           minipool_vector_head = mp;
16354           minipool_vector_label = gen_label_rtx ();
16355         }
16356       else
16357         mp->prev->next = mp;
16358
16359       minipool_vector_tail = mp;
16360     }
16361   else
16362     {
16363       if (max_address > max_mp->max_address - mp->fix_size)
16364         mp->max_address = max_mp->max_address - mp->fix_size;
16365       else
16366         mp->max_address = max_address;
16367
16368       mp->next = max_mp;
16369       mp->prev = max_mp->prev;
16370       max_mp->prev = mp;
16371       if (mp->prev != NULL)
16372         mp->prev->next = mp;
16373       else
16374         minipool_vector_head = mp;
16375     }
16376
16377   /* Save the new entry.  */
16378   max_mp = mp;
16379
16380   /* Scan over the preceding entries and adjust their addresses as
16381      required.  */
16382   while (mp->prev != NULL
16383          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16384     {
16385       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16386       mp = mp->prev;
16387     }
16388
16389   return max_mp;
16390 }
16391
16392 static Mnode *
16393 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16394                                 HOST_WIDE_INT  min_address)
16395 {
16396   HOST_WIDE_INT offset;
16397
16398   /* The code below assumes these are different.  */
16399   gcc_assert (mp != min_mp);
16400
16401   if (min_mp == NULL)
16402     {
16403       if (min_address > mp->min_address)
16404         mp->min_address = min_address;
16405     }
16406   else
16407     {
16408       /* We will adjust this below if it is too loose.  */
16409       mp->min_address = min_address;
16410
16411       /* Unlink MP from its current position.  Since min_mp is non-null,
16412          mp->next must be non-null.  */
16413       mp->next->prev = mp->prev;
16414       if (mp->prev != NULL)
16415         mp->prev->next = mp->next;
16416       else
16417         minipool_vector_head = mp->next;
16418
16419       /* Reinsert it after MIN_MP.  */
16420       mp->prev = min_mp;
16421       mp->next = min_mp->next;
16422       min_mp->next = mp;
16423       if (mp->next != NULL)
16424         mp->next->prev = mp;
16425       else
16426         minipool_vector_tail = mp;
16427     }
16428
16429   min_mp = mp;
16430
16431   offset = 0;
16432   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16433     {
16434       mp->offset = offset;
16435       if (mp->refcount > 0)
16436         offset += mp->fix_size;
16437
16438       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16439         mp->next->min_address = mp->min_address + mp->fix_size;
16440     }
16441
16442   return min_mp;
16443 }
16444
16445 /* Add a constant to the minipool for a backward reference.  Returns the
16446    node added or NULL if the constant will not fit in this pool.
16447
16448    Note that the code for insertion for a backwards reference can be
16449    somewhat confusing because the calculated offsets for each fix do
16450    not take into account the size of the pool (which is still under
16451    construction.  */
16452 static Mnode *
16453 add_minipool_backward_ref (Mfix *fix)
16454 {
16455   /* If set, min_mp is the last pool_entry that has a lower constraint
16456      than the one we are trying to add.  */
16457   Mnode *min_mp = NULL;
16458   /* This can be negative, since it is only a constraint.  */
16459   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16460   Mnode *mp;
16461
16462   /* If we can't reach the current pool from this insn, or if we can't
16463      insert this entry at the end of the pool without pushing other
16464      fixes out of range, then we don't try.  This ensures that we
16465      can't fail later on.  */
16466   if (min_address >= minipool_barrier->address
16467       || (minipool_vector_tail->min_address + fix->fix_size
16468           >= minipool_barrier->address))
16469     return NULL;
16470
16471   /* Scan the pool to see if a constant with the same value has
16472      already been added.  While we are doing this, also note the
16473      location where we must insert the constant if it doesn't already
16474      exist.  */
16475   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16476     {
16477       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16478           && fix->mode == mp->mode
16479           && (!LABEL_P (fix->value)
16480               || (CODE_LABEL_NUMBER (fix->value)
16481                   == CODE_LABEL_NUMBER (mp->value)))
16482           && rtx_equal_p (fix->value, mp->value)
16483           /* Check that there is enough slack to move this entry to the
16484              end of the table (this is conservative).  */
16485           && (mp->max_address
16486               > (minipool_barrier->address
16487                  + minipool_vector_tail->offset
16488                  + minipool_vector_tail->fix_size)))
16489         {
16490           mp->refcount++;
16491           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16492         }
16493
16494       if (min_mp != NULL)
16495         mp->min_address += fix->fix_size;
16496       else
16497         {
16498           /* Note the insertion point if necessary.  */
16499           if (mp->min_address < min_address)
16500             {
16501               /* For now, we do not allow the insertion of 8-byte alignment
16502                  requiring nodes anywhere but at the start of the pool.  */
16503               if (ARM_DOUBLEWORD_ALIGN
16504                   && fix->fix_size >= 8 && mp->fix_size < 8)
16505                 return NULL;
16506               else
16507                 min_mp = mp;
16508             }
16509           else if (mp->max_address
16510                    < minipool_barrier->address + mp->offset + fix->fix_size)
16511             {
16512               /* Inserting before this entry would push the fix beyond
16513                  its maximum address (which can happen if we have
16514                  re-located a forwards fix); force the new fix to come
16515                  after it.  */
16516               if (ARM_DOUBLEWORD_ALIGN
16517                   && fix->fix_size >= 8 && mp->fix_size < 8)
16518                 return NULL;
16519               else
16520                 {
16521                   min_mp = mp;
16522                   min_address = mp->min_address + fix->fix_size;
16523                 }
16524             }
16525           /* Do not insert a non-8-byte aligned quantity before 8-byte
16526              aligned quantities.  */
16527           else if (ARM_DOUBLEWORD_ALIGN
16528                    && fix->fix_size < 8
16529                    && mp->fix_size >= 8)
16530             {
16531               min_mp = mp;
16532               min_address = mp->min_address + fix->fix_size;
16533             }
16534         }
16535     }
16536
16537   /* We need to create a new entry.  */
16538   mp = XNEW (Mnode);
16539   mp->fix_size = fix->fix_size;
16540   mp->mode = fix->mode;
16541   mp->value = fix->value;
16542   mp->refcount = 1;
16543   mp->max_address = minipool_barrier->address + 65536;
16544
16545   mp->min_address = min_address;
16546
16547   if (min_mp == NULL)
16548     {
16549       mp->prev = NULL;
16550       mp->next = minipool_vector_head;
16551
16552       if (mp->next == NULL)
16553         {
16554           minipool_vector_tail = mp;
16555           minipool_vector_label = gen_label_rtx ();
16556         }
16557       else
16558         mp->next->prev = mp;
16559
16560       minipool_vector_head = mp;
16561     }
16562   else
16563     {
16564       mp->next = min_mp->next;
16565       mp->prev = min_mp;
16566       min_mp->next = mp;
16567
16568       if (mp->next != NULL)
16569         mp->next->prev = mp;
16570       else
16571         minipool_vector_tail = mp;
16572     }
16573
16574   /* Save the new entry.  */
16575   min_mp = mp;
16576
16577   if (mp->prev)
16578     mp = mp->prev;
16579   else
16580     mp->offset = 0;
16581
16582   /* Scan over the following entries and adjust their offsets.  */
16583   while (mp->next != NULL)
16584     {
16585       if (mp->next->min_address < mp->min_address + mp->fix_size)
16586         mp->next->min_address = mp->min_address + mp->fix_size;
16587
16588       if (mp->refcount)
16589         mp->next->offset = mp->offset + mp->fix_size;
16590       else
16591         mp->next->offset = mp->offset;
16592
16593       mp = mp->next;
16594     }
16595
16596   return min_mp;
16597 }
16598
16599 static void
16600 assign_minipool_offsets (Mfix *barrier)
16601 {
16602   HOST_WIDE_INT offset = 0;
16603   Mnode *mp;
16604
16605   minipool_barrier = barrier;
16606
16607   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16608     {
16609       mp->offset = offset;
16610
16611       if (mp->refcount > 0)
16612         offset += mp->fix_size;
16613     }
16614 }
16615
16616 /* Output the literal table */
16617 static void
16618 dump_minipool (rtx_insn *scan)
16619 {
16620   Mnode * mp;
16621   Mnode * nmp;
16622   int align64 = 0;
16623
16624   if (ARM_DOUBLEWORD_ALIGN)
16625     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16626       if (mp->refcount > 0 && mp->fix_size >= 8)
16627         {
16628           align64 = 1;
16629           break;
16630         }
16631
16632   if (dump_file)
16633     fprintf (dump_file,
16634              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16635              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16636
16637   scan = emit_label_after (gen_label_rtx (), scan);
16638   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16639   scan = emit_label_after (minipool_vector_label, scan);
16640
16641   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16642     {
16643       if (mp->refcount > 0)
16644         {
16645           if (dump_file)
16646             {
16647               fprintf (dump_file,
16648                        ";;  Offset %u, min %ld, max %ld ",
16649                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16650                        (unsigned long) mp->max_address);
16651               arm_print_value (dump_file, mp->value);
16652               fputc ('\n', dump_file);
16653             }
16654
16655           switch (mp->fix_size)
16656             {
16657 #ifdef HAVE_consttable_1
16658             case 1:
16659               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16660               break;
16661
16662 #endif
16663 #ifdef HAVE_consttable_2
16664             case 2:
16665               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16666               break;
16667
16668 #endif
16669 #ifdef HAVE_consttable_4
16670             case 4:
16671               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16672               break;
16673
16674 #endif
16675 #ifdef HAVE_consttable_8
16676             case 8:
16677               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16678               break;
16679
16680 #endif
16681 #ifdef HAVE_consttable_16
16682             case 16:
16683               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16684               break;
16685
16686 #endif
16687             default:
16688               gcc_unreachable ();
16689             }
16690         }
16691
16692       nmp = mp->next;
16693       free (mp);
16694     }
16695
16696   minipool_vector_head = minipool_vector_tail = NULL;
16697   scan = emit_insn_after (gen_consttable_end (), scan);
16698   scan = emit_barrier_after (scan);
16699 }
16700
16701 /* Return the cost of forcibly inserting a barrier after INSN.  */
16702 static int
16703 arm_barrier_cost (rtx insn)
16704 {
16705   /* Basing the location of the pool on the loop depth is preferable,
16706      but at the moment, the basic block information seems to be
16707      corrupt by this stage of the compilation.  */
16708   int base_cost = 50;
16709   rtx next = next_nonnote_insn (insn);
16710
16711   if (next != NULL && LABEL_P (next))
16712     base_cost -= 20;
16713
16714   switch (GET_CODE (insn))
16715     {
16716     case CODE_LABEL:
16717       /* It will always be better to place the table before the label, rather
16718          than after it.  */
16719       return 50;
16720
16721     case INSN:
16722     case CALL_INSN:
16723       return base_cost;
16724
16725     case JUMP_INSN:
16726       return base_cost - 10;
16727
16728     default:
16729       return base_cost + 10;
16730     }
16731 }
16732
16733 /* Find the best place in the insn stream in the range
16734    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16735    Create the barrier by inserting a jump and add a new fix entry for
16736    it.  */
16737 static Mfix *
16738 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16739 {
16740   HOST_WIDE_INT count = 0;
16741   rtx_barrier *barrier;
16742   rtx_insn *from = fix->insn;
16743   /* The instruction after which we will insert the jump.  */
16744   rtx_insn *selected = NULL;
16745   int selected_cost;
16746   /* The address at which the jump instruction will be placed.  */
16747   HOST_WIDE_INT selected_address;
16748   Mfix * new_fix;
16749   HOST_WIDE_INT max_count = max_address - fix->address;
16750   rtx_code_label *label = gen_label_rtx ();
16751
16752   selected_cost = arm_barrier_cost (from);
16753   selected_address = fix->address;
16754
16755   while (from && count < max_count)
16756     {
16757       rtx_jump_table_data *tmp;
16758       int new_cost;
16759
16760       /* This code shouldn't have been called if there was a natural barrier
16761          within range.  */
16762       gcc_assert (!BARRIER_P (from));
16763
16764       /* Count the length of this insn.  This must stay in sync with the
16765          code that pushes minipool fixes.  */
16766       if (LABEL_P (from))
16767         count += get_label_padding (from);
16768       else
16769         count += get_attr_length (from);
16770
16771       /* If there is a jump table, add its length.  */
16772       if (tablejump_p (from, NULL, &tmp))
16773         {
16774           count += get_jump_table_size (tmp);
16775
16776           /* Jump tables aren't in a basic block, so base the cost on
16777              the dispatch insn.  If we select this location, we will
16778              still put the pool after the table.  */
16779           new_cost = arm_barrier_cost (from);
16780
16781           if (count < max_count
16782               && (!selected || new_cost <= selected_cost))
16783             {
16784               selected = tmp;
16785               selected_cost = new_cost;
16786               selected_address = fix->address + count;
16787             }
16788
16789           /* Continue after the dispatch table.  */
16790           from = NEXT_INSN (tmp);
16791           continue;
16792         }
16793
16794       new_cost = arm_barrier_cost (from);
16795
16796       if (count < max_count
16797           && (!selected || new_cost <= selected_cost))
16798         {
16799           selected = from;
16800           selected_cost = new_cost;
16801           selected_address = fix->address + count;
16802         }
16803
16804       from = NEXT_INSN (from);
16805     }
16806
16807   /* Make sure that we found a place to insert the jump.  */
16808   gcc_assert (selected);
16809
16810   /* Make sure we do not split a call and its corresponding
16811      CALL_ARG_LOCATION note.  */
16812   if (CALL_P (selected))
16813     {
16814       rtx_insn *next = NEXT_INSN (selected);
16815       if (next && NOTE_P (next)
16816           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16817           selected = next;
16818     }
16819
16820   /* Create a new JUMP_INSN that branches around a barrier.  */
16821   from = emit_jump_insn_after (gen_jump (label), selected);
16822   JUMP_LABEL (from) = label;
16823   barrier = emit_barrier_after (from);
16824   emit_label_after (label, barrier);
16825
16826   /* Create a minipool barrier entry for the new barrier.  */
16827   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16828   new_fix->insn = barrier;
16829   new_fix->address = selected_address;
16830   new_fix->next = fix->next;
16831   fix->next = new_fix;
16832
16833   return new_fix;
16834 }
16835
16836 /* Record that there is a natural barrier in the insn stream at
16837    ADDRESS.  */
16838 static void
16839 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16840 {
16841   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16842
16843   fix->insn = insn;
16844   fix->address = address;
16845
16846   fix->next = NULL;
16847   if (minipool_fix_head != NULL)
16848     minipool_fix_tail->next = fix;
16849   else
16850     minipool_fix_head = fix;
16851
16852   minipool_fix_tail = fix;
16853 }
16854
16855 /* Record INSN, which will need fixing up to load a value from the
16856    minipool.  ADDRESS is the offset of the insn since the start of the
16857    function; LOC is a pointer to the part of the insn which requires
16858    fixing; VALUE is the constant that must be loaded, which is of type
16859    MODE.  */
16860 static void
16861 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16862                    machine_mode mode, rtx value)
16863 {
16864   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16865
16866   fix->insn = insn;
16867   fix->address = address;
16868   fix->loc = loc;
16869   fix->mode = mode;
16870   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16871   fix->value = value;
16872   fix->forwards = get_attr_pool_range (insn);
16873   fix->backwards = get_attr_neg_pool_range (insn);
16874   fix->minipool = NULL;
16875
16876   /* If an insn doesn't have a range defined for it, then it isn't
16877      expecting to be reworked by this code.  Better to stop now than
16878      to generate duff assembly code.  */
16879   gcc_assert (fix->forwards || fix->backwards);
16880
16881   /* If an entry requires 8-byte alignment then assume all constant pools
16882      require 4 bytes of padding.  Trying to do this later on a per-pool
16883      basis is awkward because existing pool entries have to be modified.  */
16884   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16885     minipool_pad = 4;
16886
16887   if (dump_file)
16888     {
16889       fprintf (dump_file,
16890                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16891                GET_MODE_NAME (mode),
16892                INSN_UID (insn), (unsigned long) address,
16893                -1 * (long)fix->backwards, (long)fix->forwards);
16894       arm_print_value (dump_file, fix->value);
16895       fprintf (dump_file, "\n");
16896     }
16897
16898   /* Add it to the chain of fixes.  */
16899   fix->next = NULL;
16900
16901   if (minipool_fix_head != NULL)
16902     minipool_fix_tail->next = fix;
16903   else
16904     minipool_fix_head = fix;
16905
16906   minipool_fix_tail = fix;
16907 }
16908
16909 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16910    Returns the number of insns needed, or 99 if we always want to synthesize
16911    the value.  */
16912 int
16913 arm_max_const_double_inline_cost ()
16914 {
16915   /* Let the value get synthesized to avoid the use of literal pools.  */
16916   if (arm_disable_literal_pool)
16917     return 99;
16918
16919   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16920 }
16921
16922 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16923    Returns the number of insns needed, or 99 if we don't know how to
16924    do it.  */
16925 int
16926 arm_const_double_inline_cost (rtx val)
16927 {
16928   rtx lowpart, highpart;
16929   machine_mode mode;
16930
16931   mode = GET_MODE (val);
16932
16933   if (mode == VOIDmode)
16934     mode = DImode;
16935
16936   gcc_assert (GET_MODE_SIZE (mode) == 8);
16937
16938   lowpart = gen_lowpart (SImode, val);
16939   highpart = gen_highpart_mode (SImode, mode, val);
16940
16941   gcc_assert (CONST_INT_P (lowpart));
16942   gcc_assert (CONST_INT_P (highpart));
16943
16944   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16945                             NULL_RTX, NULL_RTX, 0, 0)
16946           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16947                               NULL_RTX, NULL_RTX, 0, 0));
16948 }
16949
16950 /* Cost of loading a SImode constant.  */
16951 static inline int
16952 arm_const_inline_cost (enum rtx_code code, rtx val)
16953 {
16954   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16955                            NULL_RTX, NULL_RTX, 1, 0);
16956 }
16957
16958 /* Return true if it is worthwhile to split a 64-bit constant into two
16959    32-bit operations.  This is the case if optimizing for size, or
16960    if we have load delay slots, or if one 32-bit part can be done with
16961    a single data operation.  */
16962 bool
16963 arm_const_double_by_parts (rtx val)
16964 {
16965   machine_mode mode = GET_MODE (val);
16966   rtx part;
16967
16968   if (optimize_size || arm_ld_sched)
16969     return true;
16970
16971   if (mode == VOIDmode)
16972     mode = DImode;
16973
16974   part = gen_highpart_mode (SImode, mode, val);
16975
16976   gcc_assert (CONST_INT_P (part));
16977
16978   if (const_ok_for_arm (INTVAL (part))
16979       || const_ok_for_arm (~INTVAL (part)))
16980     return true;
16981
16982   part = gen_lowpart (SImode, val);
16983
16984   gcc_assert (CONST_INT_P (part));
16985
16986   if (const_ok_for_arm (INTVAL (part))
16987       || const_ok_for_arm (~INTVAL (part)))
16988     return true;
16989
16990   return false;
16991 }
16992
16993 /* Return true if it is possible to inline both the high and low parts
16994    of a 64-bit constant into 32-bit data processing instructions.  */
16995 bool
16996 arm_const_double_by_immediates (rtx val)
16997 {
16998   machine_mode mode = GET_MODE (val);
16999   rtx part;
17000
17001   if (mode == VOIDmode)
17002     mode = DImode;
17003
17004   part = gen_highpart_mode (SImode, mode, val);
17005
17006   gcc_assert (CONST_INT_P (part));
17007
17008   if (!const_ok_for_arm (INTVAL (part)))
17009     return false;
17010
17011   part = gen_lowpart (SImode, val);
17012
17013   gcc_assert (CONST_INT_P (part));
17014
17015   if (!const_ok_for_arm (INTVAL (part)))
17016     return false;
17017
17018   return true;
17019 }
17020
17021 /* Scan INSN and note any of its operands that need fixing.
17022    If DO_PUSHES is false we do not actually push any of the fixups
17023    needed.  */
17024 static void
17025 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17026 {
17027   int opno;
17028
17029   extract_constrain_insn (insn);
17030
17031   if (recog_data.n_alternatives == 0)
17032     return;
17033
17034   /* Fill in recog_op_alt with information about the constraints of
17035      this insn.  */
17036   preprocess_constraints (insn);
17037
17038   const operand_alternative *op_alt = which_op_alt ();
17039   for (opno = 0; opno < recog_data.n_operands; opno++)
17040     {
17041       /* Things we need to fix can only occur in inputs.  */
17042       if (recog_data.operand_type[opno] != OP_IN)
17043         continue;
17044
17045       /* If this alternative is a memory reference, then any mention
17046          of constants in this alternative is really to fool reload
17047          into allowing us to accept one there.  We need to fix them up
17048          now so that we output the right code.  */
17049       if (op_alt[opno].memory_ok)
17050         {
17051           rtx op = recog_data.operand[opno];
17052
17053           if (CONSTANT_P (op))
17054             {
17055               if (do_pushes)
17056                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17057                                    recog_data.operand_mode[opno], op);
17058             }
17059           else if (MEM_P (op)
17060                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17061                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17062             {
17063               if (do_pushes)
17064                 {
17065                   rtx cop = avoid_constant_pool_reference (op);
17066
17067                   /* Casting the address of something to a mode narrower
17068                      than a word can cause avoid_constant_pool_reference()
17069                      to return the pool reference itself.  That's no good to
17070                      us here.  Lets just hope that we can use the
17071                      constant pool value directly.  */
17072                   if (op == cop)
17073                     cop = get_pool_constant (XEXP (op, 0));
17074
17075                   push_minipool_fix (insn, address,
17076                                      recog_data.operand_loc[opno],
17077                                      recog_data.operand_mode[opno], cop);
17078                 }
17079
17080             }
17081         }
17082     }
17083
17084   return;
17085 }
17086
17087 /* Rewrite move insn into subtract of 0 if the condition codes will
17088    be useful in next conditional jump insn.  */
17089
17090 static void
17091 thumb1_reorg (void)
17092 {
17093   basic_block bb;
17094
17095   FOR_EACH_BB_FN (bb, cfun)
17096     {
17097       rtx dest, src;
17098       rtx pat, op0, set = NULL;
17099       rtx_insn *prev, *insn = BB_END (bb);
17100       bool insn_clobbered = false;
17101
17102       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17103         insn = PREV_INSN (insn);
17104
17105       /* Find the last cbranchsi4_insn in basic block BB.  */
17106       if (insn == BB_HEAD (bb)
17107           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17108         continue;
17109
17110       /* Get the register with which we are comparing.  */
17111       pat = PATTERN (insn);
17112       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17113
17114       /* Find the first flag setting insn before INSN in basic block BB.  */
17115       gcc_assert (insn != BB_HEAD (bb));
17116       for (prev = PREV_INSN (insn);
17117            (!insn_clobbered
17118             && prev != BB_HEAD (bb)
17119             && (NOTE_P (prev)
17120                 || DEBUG_INSN_P (prev)
17121                 || ((set = single_set (prev)) != NULL
17122                     && get_attr_conds (prev) == CONDS_NOCOND)));
17123            prev = PREV_INSN (prev))
17124         {
17125           if (reg_set_p (op0, prev))
17126             insn_clobbered = true;
17127         }
17128
17129       /* Skip if op0 is clobbered by insn other than prev. */
17130       if (insn_clobbered)
17131         continue;
17132
17133       if (!set)
17134         continue;
17135
17136       dest = SET_DEST (set);
17137       src = SET_SRC (set);
17138       if (!low_register_operand (dest, SImode)
17139           || !low_register_operand (src, SImode))
17140         continue;
17141
17142       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17143          in INSN.  Both src and dest of the move insn are checked.  */
17144       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17145         {
17146           dest = copy_rtx (dest);
17147           src = copy_rtx (src);
17148           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17149           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17150           INSN_CODE (prev) = -1;
17151           /* Set test register in INSN to dest.  */
17152           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17153           INSN_CODE (insn) = -1;
17154         }
17155     }
17156 }
17157
17158 /* Convert instructions to their cc-clobbering variant if possible, since
17159    that allows us to use smaller encodings.  */
17160
17161 static void
17162 thumb2_reorg (void)
17163 {
17164   basic_block bb;
17165   regset_head live;
17166
17167   INIT_REG_SET (&live);
17168
17169   /* We are freeing block_for_insn in the toplev to keep compatibility
17170      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17171   compute_bb_for_insn ();
17172   df_analyze ();
17173
17174   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17175
17176   FOR_EACH_BB_FN (bb, cfun)
17177     {
17178       if (current_tune->disparage_flag_setting_t16_encodings
17179           && optimize_bb_for_speed_p (bb))
17180         continue;
17181
17182       rtx_insn *insn;
17183       Convert_Action action = SKIP;
17184       Convert_Action action_for_partial_flag_setting
17185         = (current_tune->disparage_partial_flag_setting_t16_encodings
17186            && optimize_bb_for_speed_p (bb))
17187           ? SKIP : CONV;
17188
17189       COPY_REG_SET (&live, DF_LR_OUT (bb));
17190       df_simulate_initialize_backwards (bb, &live);
17191       FOR_BB_INSNS_REVERSE (bb, insn)
17192         {
17193           if (NONJUMP_INSN_P (insn)
17194               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17195               && GET_CODE (PATTERN (insn)) == SET)
17196             {
17197               action = SKIP;
17198               rtx pat = PATTERN (insn);
17199               rtx dst = XEXP (pat, 0);
17200               rtx src = XEXP (pat, 1);
17201               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17202
17203               if (!OBJECT_P (src))
17204                   op0 = XEXP (src, 0);
17205
17206               if (BINARY_P (src))
17207                   op1 = XEXP (src, 1);
17208
17209               if (low_register_operand (dst, SImode))
17210                 {
17211                   switch (GET_CODE (src))
17212                     {
17213                     case PLUS:
17214                       /* Adding two registers and storing the result
17215                          in the first source is already a 16-bit
17216                          operation.  */
17217                       if (rtx_equal_p (dst, op0)
17218                           && register_operand (op1, SImode))
17219                         break;
17220
17221                       if (low_register_operand (op0, SImode))
17222                         {
17223                           /* ADDS <Rd>,<Rn>,<Rm>  */
17224                           if (low_register_operand (op1, SImode))
17225                             action = CONV;
17226                           /* ADDS <Rdn>,#<imm8>  */
17227                           /* SUBS <Rdn>,#<imm8>  */
17228                           else if (rtx_equal_p (dst, op0)
17229                                    && CONST_INT_P (op1)
17230                                    && IN_RANGE (INTVAL (op1), -255, 255))
17231                             action = CONV;
17232                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17233                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17234                           else if (CONST_INT_P (op1)
17235                                    && IN_RANGE (INTVAL (op1), -7, 7))
17236                             action = CONV;
17237                         }
17238                       /* ADCS <Rd>, <Rn>  */
17239                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17240                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17241                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17242                                                        SImode)
17243                               && COMPARISON_P (op1)
17244                               && cc_register (XEXP (op1, 0), VOIDmode)
17245                               && maybe_get_arm_condition_code (op1) == ARM_CS
17246                               && XEXP (op1, 1) == const0_rtx)
17247                         action = CONV;
17248                       break;
17249
17250                     case MINUS:
17251                       /* RSBS <Rd>,<Rn>,#0
17252                          Not handled here: see NEG below.  */
17253                       /* SUBS <Rd>,<Rn>,#<imm3>
17254                          SUBS <Rdn>,#<imm8>
17255                          Not handled here: see PLUS above.  */
17256                       /* SUBS <Rd>,<Rn>,<Rm>  */
17257                       if (low_register_operand (op0, SImode)
17258                           && low_register_operand (op1, SImode))
17259                             action = CONV;
17260                       break;
17261
17262                     case MULT:
17263                       /* MULS <Rdm>,<Rn>,<Rdm>
17264                          As an exception to the rule, this is only used
17265                          when optimizing for size since MULS is slow on all
17266                          known implementations.  We do not even want to use
17267                          MULS in cold code, if optimizing for speed, so we
17268                          test the global flag here.  */
17269                       if (!optimize_size)
17270                         break;
17271                       /* else fall through.  */
17272                     case AND:
17273                     case IOR:
17274                     case XOR:
17275                       /* ANDS <Rdn>,<Rm>  */
17276                       if (rtx_equal_p (dst, op0)
17277                           && low_register_operand (op1, SImode))
17278                         action = action_for_partial_flag_setting;
17279                       else if (rtx_equal_p (dst, op1)
17280                                && low_register_operand (op0, SImode))
17281                         action = action_for_partial_flag_setting == SKIP
17282                                  ? SKIP : SWAP_CONV;
17283                       break;
17284
17285                     case ASHIFTRT:
17286                     case ASHIFT:
17287                     case LSHIFTRT:
17288                       /* ASRS <Rdn>,<Rm> */
17289                       /* LSRS <Rdn>,<Rm> */
17290                       /* LSLS <Rdn>,<Rm> */
17291                       if (rtx_equal_p (dst, op0)
17292                           && low_register_operand (op1, SImode))
17293                         action = action_for_partial_flag_setting;
17294                       /* ASRS <Rd>,<Rm>,#<imm5> */
17295                       /* LSRS <Rd>,<Rm>,#<imm5> */
17296                       /* LSLS <Rd>,<Rm>,#<imm5> */
17297                       else if (low_register_operand (op0, SImode)
17298                                && CONST_INT_P (op1)
17299                                && IN_RANGE (INTVAL (op1), 0, 31))
17300                         action = action_for_partial_flag_setting;
17301                       break;
17302
17303                     case ROTATERT:
17304                       /* RORS <Rdn>,<Rm>  */
17305                       if (rtx_equal_p (dst, op0)
17306                           && low_register_operand (op1, SImode))
17307                         action = action_for_partial_flag_setting;
17308                       break;
17309
17310                     case NOT:
17311                       /* MVNS <Rd>,<Rm>  */
17312                       if (low_register_operand (op0, SImode))
17313                         action = action_for_partial_flag_setting;
17314                       break;
17315
17316                     case NEG:
17317                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17318                       if (low_register_operand (op0, SImode))
17319                         action = CONV;
17320                       break;
17321
17322                     case CONST_INT:
17323                       /* MOVS <Rd>,#<imm8>  */
17324                       if (CONST_INT_P (src)
17325                           && IN_RANGE (INTVAL (src), 0, 255))
17326                         action = action_for_partial_flag_setting;
17327                       break;
17328
17329                     case REG:
17330                       /* MOVS and MOV<c> with registers have different
17331                          encodings, so are not relevant here.  */
17332                       break;
17333
17334                     default:
17335                       break;
17336                     }
17337                 }
17338
17339               if (action != SKIP)
17340                 {
17341                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17342                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17343                   rtvec vec;
17344
17345                   if (action == SWAP_CONV)
17346                     {
17347                       src = copy_rtx (src);
17348                       XEXP (src, 0) = op1;
17349                       XEXP (src, 1) = op0;
17350                       pat = gen_rtx_SET (VOIDmode, dst, src);
17351                       vec = gen_rtvec (2, pat, clobber);
17352                     }
17353                   else /* action == CONV */
17354                     vec = gen_rtvec (2, pat, clobber);
17355
17356                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17357                   INSN_CODE (insn) = -1;
17358                 }
17359             }
17360
17361           if (NONDEBUG_INSN_P (insn))
17362             df_simulate_one_insn_backwards (bb, insn, &live);
17363         }
17364     }
17365
17366   CLEAR_REG_SET (&live);
17367 }
17368
17369 /* Gcc puts the pool in the wrong place for ARM, since we can only
17370    load addresses a limited distance around the pc.  We do some
17371    special munging to move the constant pool values to the correct
17372    point in the code.  */
17373 static void
17374 arm_reorg (void)
17375 {
17376   rtx_insn *insn;
17377   HOST_WIDE_INT address = 0;
17378   Mfix * fix;
17379
17380   if (TARGET_THUMB1)
17381     thumb1_reorg ();
17382   else if (TARGET_THUMB2)
17383     thumb2_reorg ();
17384
17385   /* Ensure all insns that must be split have been split at this point.
17386      Otherwise, the pool placement code below may compute incorrect
17387      insn lengths.  Note that when optimizing, all insns have already
17388      been split at this point.  */
17389   if (!optimize)
17390     split_all_insns_noflow ();
17391
17392   minipool_fix_head = minipool_fix_tail = NULL;
17393
17394   /* The first insn must always be a note, or the code below won't
17395      scan it properly.  */
17396   insn = get_insns ();
17397   gcc_assert (NOTE_P (insn));
17398   minipool_pad = 0;
17399
17400   /* Scan all the insns and record the operands that will need fixing.  */
17401   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17402     {
17403       if (BARRIER_P (insn))
17404         push_minipool_barrier (insn, address);
17405       else if (INSN_P (insn))
17406         {
17407           rtx_jump_table_data *table;
17408
17409           note_invalid_constants (insn, address, true);
17410           address += get_attr_length (insn);
17411
17412           /* If the insn is a vector jump, add the size of the table
17413              and skip the table.  */
17414           if (tablejump_p (insn, NULL, &table))
17415             {
17416               address += get_jump_table_size (table);
17417               insn = table;
17418             }
17419         }
17420       else if (LABEL_P (insn))
17421         /* Add the worst-case padding due to alignment.  We don't add
17422            the _current_ padding because the minipool insertions
17423            themselves might change it.  */
17424         address += get_label_padding (insn);
17425     }
17426
17427   fix = minipool_fix_head;
17428
17429   /* Now scan the fixups and perform the required changes.  */
17430   while (fix)
17431     {
17432       Mfix * ftmp;
17433       Mfix * fdel;
17434       Mfix *  last_added_fix;
17435       Mfix * last_barrier = NULL;
17436       Mfix * this_fix;
17437
17438       /* Skip any further barriers before the next fix.  */
17439       while (fix && BARRIER_P (fix->insn))
17440         fix = fix->next;
17441
17442       /* No more fixes.  */
17443       if (fix == NULL)
17444         break;
17445
17446       last_added_fix = NULL;
17447
17448       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17449         {
17450           if (BARRIER_P (ftmp->insn))
17451             {
17452               if (ftmp->address >= minipool_vector_head->max_address)
17453                 break;
17454
17455               last_barrier = ftmp;
17456             }
17457           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17458             break;
17459
17460           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17461         }
17462
17463       /* If we found a barrier, drop back to that; any fixes that we
17464          could have reached but come after the barrier will now go in
17465          the next mini-pool.  */
17466       if (last_barrier != NULL)
17467         {
17468           /* Reduce the refcount for those fixes that won't go into this
17469              pool after all.  */
17470           for (fdel = last_barrier->next;
17471                fdel && fdel != ftmp;
17472                fdel = fdel->next)
17473             {
17474               fdel->minipool->refcount--;
17475               fdel->minipool = NULL;
17476             }
17477
17478           ftmp = last_barrier;
17479         }
17480       else
17481         {
17482           /* ftmp is first fix that we can't fit into this pool and
17483              there no natural barriers that we could use.  Insert a
17484              new barrier in the code somewhere between the previous
17485              fix and this one, and arrange to jump around it.  */
17486           HOST_WIDE_INT max_address;
17487
17488           /* The last item on the list of fixes must be a barrier, so
17489              we can never run off the end of the list of fixes without
17490              last_barrier being set.  */
17491           gcc_assert (ftmp);
17492
17493           max_address = minipool_vector_head->max_address;
17494           /* Check that there isn't another fix that is in range that
17495              we couldn't fit into this pool because the pool was
17496              already too large: we need to put the pool before such an
17497              instruction.  The pool itself may come just after the
17498              fix because create_fix_barrier also allows space for a
17499              jump instruction.  */
17500           if (ftmp->address < max_address)
17501             max_address = ftmp->address + 1;
17502
17503           last_barrier = create_fix_barrier (last_added_fix, max_address);
17504         }
17505
17506       assign_minipool_offsets (last_barrier);
17507
17508       while (ftmp)
17509         {
17510           if (!BARRIER_P (ftmp->insn)
17511               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17512                   == NULL))
17513             break;
17514
17515           ftmp = ftmp->next;
17516         }
17517
17518       /* Scan over the fixes we have identified for this pool, fixing them
17519          up and adding the constants to the pool itself.  */
17520       for (this_fix = fix; this_fix && ftmp != this_fix;
17521            this_fix = this_fix->next)
17522         if (!BARRIER_P (this_fix->insn))
17523           {
17524             rtx addr
17525               = plus_constant (Pmode,
17526                                gen_rtx_LABEL_REF (VOIDmode,
17527                                                   minipool_vector_label),
17528                                this_fix->minipool->offset);
17529             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17530           }
17531
17532       dump_minipool (last_barrier->insn);
17533       fix = ftmp;
17534     }
17535
17536   /* From now on we must synthesize any constants that we can't handle
17537      directly.  This can happen if the RTL gets split during final
17538      instruction generation.  */
17539   cfun->machine->after_arm_reorg = 1;
17540
17541   /* Free the minipool memory.  */
17542   obstack_free (&minipool_obstack, minipool_startobj);
17543 }
17544 \f
17545 /* Routines to output assembly language.  */
17546
17547 /* Return string representation of passed in real value.  */
17548 static const char *
17549 fp_const_from_val (REAL_VALUE_TYPE *r)
17550 {
17551   if (!fp_consts_inited)
17552     init_fp_table ();
17553
17554   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17555   return "0";
17556 }
17557
17558 /* OPERANDS[0] is the entire list of insns that constitute pop,
17559    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17560    is in the list, UPDATE is true iff the list contains explicit
17561    update of base register.  */
17562 void
17563 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17564                          bool update)
17565 {
17566   int i;
17567   char pattern[100];
17568   int offset;
17569   const char *conditional;
17570   int num_saves = XVECLEN (operands[0], 0);
17571   unsigned int regno;
17572   unsigned int regno_base = REGNO (operands[1]);
17573
17574   offset = 0;
17575   offset += update ? 1 : 0;
17576   offset += return_pc ? 1 : 0;
17577
17578   /* Is the base register in the list?  */
17579   for (i = offset; i < num_saves; i++)
17580     {
17581       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17582       /* If SP is in the list, then the base register must be SP.  */
17583       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17584       /* If base register is in the list, there must be no explicit update.  */
17585       if (regno == regno_base)
17586         gcc_assert (!update);
17587     }
17588
17589   conditional = reverse ? "%?%D0" : "%?%d0";
17590   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17591     {
17592       /* Output pop (not stmfd) because it has a shorter encoding.  */
17593       gcc_assert (update);
17594       sprintf (pattern, "pop%s\t{", conditional);
17595     }
17596   else
17597     {
17598       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17599          It's just a convention, their semantics are identical.  */
17600       if (regno_base == SP_REGNUM)
17601         sprintf (pattern, "ldm%sfd\t", conditional);
17602       else if (TARGET_UNIFIED_ASM)
17603         sprintf (pattern, "ldmia%s\t", conditional);
17604       else
17605         sprintf (pattern, "ldm%sia\t", conditional);
17606
17607       strcat (pattern, reg_names[regno_base]);
17608       if (update)
17609         strcat (pattern, "!, {");
17610       else
17611         strcat (pattern, ", {");
17612     }
17613
17614   /* Output the first destination register.  */
17615   strcat (pattern,
17616           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17617
17618   /* Output the rest of the destination registers.  */
17619   for (i = offset + 1; i < num_saves; i++)
17620     {
17621       strcat (pattern, ", ");
17622       strcat (pattern,
17623               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17624     }
17625
17626   strcat (pattern, "}");
17627
17628   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17629     strcat (pattern, "^");
17630
17631   output_asm_insn (pattern, &cond);
17632 }
17633
17634
17635 /* Output the assembly for a store multiple.  */
17636
17637 const char *
17638 vfp_output_vstmd (rtx * operands)
17639 {
17640   char pattern[100];
17641   int p;
17642   int base;
17643   int i;
17644   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17645                    ? XEXP (operands[0], 0)
17646                    : XEXP (XEXP (operands[0], 0), 0);
17647   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17648
17649   if (push_p)
17650     strcpy (pattern, "vpush%?.64\t{%P1");
17651   else
17652     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17653
17654   p = strlen (pattern);
17655
17656   gcc_assert (REG_P (operands[1]));
17657
17658   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17659   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17660     {
17661       p += sprintf (&pattern[p], ", d%d", base + i);
17662     }
17663   strcpy (&pattern[p], "}");
17664
17665   output_asm_insn (pattern, operands);
17666   return "";
17667 }
17668
17669
17670 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17671    number of bytes pushed.  */
17672
17673 static int
17674 vfp_emit_fstmd (int base_reg, int count)
17675 {
17676   rtx par;
17677   rtx dwarf;
17678   rtx tmp, reg;
17679   int i;
17680
17681   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17682      register pairs are stored by a store multiple insn.  We avoid this
17683      by pushing an extra pair.  */
17684   if (count == 2 && !arm_arch6)
17685     {
17686       if (base_reg == LAST_VFP_REGNUM - 3)
17687         base_reg -= 2;
17688       count++;
17689     }
17690
17691   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17692      larger stores into multiple parts (up to a maximum of two, in
17693      practice).  */
17694   if (count > 16)
17695     {
17696       int saved;
17697       /* NOTE: base_reg is an internal register number, so each D register
17698          counts as 2.  */
17699       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17700       saved += vfp_emit_fstmd (base_reg, 16);
17701       return saved;
17702     }
17703
17704   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17705   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17706
17707   reg = gen_rtx_REG (DFmode, base_reg);
17708   base_reg += 2;
17709
17710   XVECEXP (par, 0, 0)
17711     = gen_rtx_SET (VOIDmode,
17712                    gen_frame_mem
17713                    (BLKmode,
17714                     gen_rtx_PRE_MODIFY (Pmode,
17715                                         stack_pointer_rtx,
17716                                         plus_constant
17717                                         (Pmode, stack_pointer_rtx,
17718                                          - (count * 8)))
17719                     ),
17720                    gen_rtx_UNSPEC (BLKmode,
17721                                    gen_rtvec (1, reg),
17722                                    UNSPEC_PUSH_MULT));
17723
17724   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17725                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17726   RTX_FRAME_RELATED_P (tmp) = 1;
17727   XVECEXP (dwarf, 0, 0) = tmp;
17728
17729   tmp = gen_rtx_SET (VOIDmode,
17730                      gen_frame_mem (DFmode, stack_pointer_rtx),
17731                      reg);
17732   RTX_FRAME_RELATED_P (tmp) = 1;
17733   XVECEXP (dwarf, 0, 1) = tmp;
17734
17735   for (i = 1; i < count; i++)
17736     {
17737       reg = gen_rtx_REG (DFmode, base_reg);
17738       base_reg += 2;
17739       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17740
17741       tmp = gen_rtx_SET (VOIDmode,
17742                          gen_frame_mem (DFmode,
17743                                         plus_constant (Pmode,
17744                                                        stack_pointer_rtx,
17745                                                        i * 8)),
17746                          reg);
17747       RTX_FRAME_RELATED_P (tmp) = 1;
17748       XVECEXP (dwarf, 0, i + 1) = tmp;
17749     }
17750
17751   par = emit_insn (par);
17752   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17753   RTX_FRAME_RELATED_P (par) = 1;
17754
17755   return count * 8;
17756 }
17757
17758 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17759    the call target.  */
17760
17761 void
17762 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17763 {
17764   rtx insn;
17765
17766   insn = emit_call_insn (pat);
17767
17768   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17769      If the call might use such an entry, add a use of the PIC register
17770      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17771   if (TARGET_VXWORKS_RTP
17772       && flag_pic
17773       && !sibcall
17774       && GET_CODE (addr) == SYMBOL_REF
17775       && (SYMBOL_REF_DECL (addr)
17776           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17777           : !SYMBOL_REF_LOCAL_P (addr)))
17778     {
17779       require_pic_register ();
17780       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17781     }
17782
17783   if (TARGET_AAPCS_BASED)
17784     {
17785       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17786          linker.  We need to add an IP clobber to allow setting
17787          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17788          is not needed since it's a fixed register.  */
17789       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17790       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17791     }
17792 }
17793
17794 /* Output a 'call' insn.  */
17795 const char *
17796 output_call (rtx *operands)
17797 {
17798   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17799
17800   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17801   if (REGNO (operands[0]) == LR_REGNUM)
17802     {
17803       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17804       output_asm_insn ("mov%?\t%0, %|lr", operands);
17805     }
17806
17807   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17808
17809   if (TARGET_INTERWORK || arm_arch4t)
17810     output_asm_insn ("bx%?\t%0", operands);
17811   else
17812     output_asm_insn ("mov%?\t%|pc, %0", operands);
17813
17814   return "";
17815 }
17816
17817 /* Output a 'call' insn that is a reference in memory. This is
17818    disabled for ARMv5 and we prefer a blx instead because otherwise
17819    there's a significant performance overhead.  */
17820 const char *
17821 output_call_mem (rtx *operands)
17822 {
17823   gcc_assert (!arm_arch5);
17824   if (TARGET_INTERWORK)
17825     {
17826       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17827       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17828       output_asm_insn ("bx%?\t%|ip", operands);
17829     }
17830   else if (regno_use_in (LR_REGNUM, operands[0]))
17831     {
17832       /* LR is used in the memory address.  We load the address in the
17833          first instruction.  It's safe to use IP as the target of the
17834          load since the call will kill it anyway.  */
17835       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17836       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17837       if (arm_arch4t)
17838         output_asm_insn ("bx%?\t%|ip", operands);
17839       else
17840         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17841     }
17842   else
17843     {
17844       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17845       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17846     }
17847
17848   return "";
17849 }
17850
17851
17852 /* Output a move from arm registers to arm registers of a long double
17853    OPERANDS[0] is the destination.
17854    OPERANDS[1] is the source.  */
17855 const char *
17856 output_mov_long_double_arm_from_arm (rtx *operands)
17857 {
17858   /* We have to be careful here because the two might overlap.  */
17859   int dest_start = REGNO (operands[0]);
17860   int src_start = REGNO (operands[1]);
17861   rtx ops[2];
17862   int i;
17863
17864   if (dest_start < src_start)
17865     {
17866       for (i = 0; i < 3; i++)
17867         {
17868           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17869           ops[1] = gen_rtx_REG (SImode, src_start + i);
17870           output_asm_insn ("mov%?\t%0, %1", ops);
17871         }
17872     }
17873   else
17874     {
17875       for (i = 2; i >= 0; i--)
17876         {
17877           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17878           ops[1] = gen_rtx_REG (SImode, src_start + i);
17879           output_asm_insn ("mov%?\t%0, %1", ops);
17880         }
17881     }
17882
17883   return "";
17884 }
17885
17886 void
17887 arm_emit_movpair (rtx dest, rtx src)
17888  {
17889   /* If the src is an immediate, simplify it.  */
17890   if (CONST_INT_P (src))
17891     {
17892       HOST_WIDE_INT val = INTVAL (src);
17893       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17894       if ((val >> 16) & 0x0000ffff)
17895         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17896                                              GEN_INT (16)),
17897                        GEN_INT ((val >> 16) & 0x0000ffff));
17898       return;
17899     }
17900    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17901    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17902  }
17903
17904 /* Output a move between double words.  It must be REG<-MEM
17905    or MEM<-REG.  */
17906 const char *
17907 output_move_double (rtx *operands, bool emit, int *count)
17908 {
17909   enum rtx_code code0 = GET_CODE (operands[0]);
17910   enum rtx_code code1 = GET_CODE (operands[1]);
17911   rtx otherops[3];
17912   if (count)
17913     *count = 1;
17914
17915   /* The only case when this might happen is when
17916      you are looking at the length of a DImode instruction
17917      that has an invalid constant in it.  */
17918   if (code0 == REG && code1 != MEM)
17919     {
17920       gcc_assert (!emit);
17921       *count = 2;
17922       return "";
17923     }
17924
17925   if (code0 == REG)
17926     {
17927       unsigned int reg0 = REGNO (operands[0]);
17928
17929       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17930
17931       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17932
17933       switch (GET_CODE (XEXP (operands[1], 0)))
17934         {
17935         case REG:
17936
17937           if (emit)
17938             {
17939               if (TARGET_LDRD
17940                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17941                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17942               else
17943                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17944             }
17945           break;
17946
17947         case PRE_INC:
17948           gcc_assert (TARGET_LDRD);
17949           if (emit)
17950             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17951           break;
17952
17953         case PRE_DEC:
17954           if (emit)
17955             {
17956               if (TARGET_LDRD)
17957                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17958               else
17959                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17960             }
17961           break;
17962
17963         case POST_INC:
17964           if (emit)
17965             {
17966               if (TARGET_LDRD)
17967                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17968               else
17969                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17970             }
17971           break;
17972
17973         case POST_DEC:
17974           gcc_assert (TARGET_LDRD);
17975           if (emit)
17976             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17977           break;
17978
17979         case PRE_MODIFY:
17980         case POST_MODIFY:
17981           /* Autoicrement addressing modes should never have overlapping
17982              base and destination registers, and overlapping index registers
17983              are already prohibited, so this doesn't need to worry about
17984              fix_cm3_ldrd.  */
17985           otherops[0] = operands[0];
17986           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17987           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17988
17989           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17990             {
17991               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17992                 {
17993                   /* Registers overlap so split out the increment.  */
17994                   if (emit)
17995                     {
17996                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17997                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17998                     }
17999                   if (count)
18000                     *count = 2;
18001                 }
18002               else
18003                 {
18004                   /* Use a single insn if we can.
18005                      FIXME: IWMMXT allows offsets larger than ldrd can
18006                      handle, fix these up with a pair of ldr.  */
18007                   if (TARGET_THUMB2
18008                       || !CONST_INT_P (otherops[2])
18009                       || (INTVAL (otherops[2]) > -256
18010                           && INTVAL (otherops[2]) < 256))
18011                     {
18012                       if (emit)
18013                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18014                     }
18015                   else
18016                     {
18017                       if (emit)
18018                         {
18019                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18020                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18021                         }
18022                       if (count)
18023                         *count = 2;
18024
18025                     }
18026                 }
18027             }
18028           else
18029             {
18030               /* Use a single insn if we can.
18031                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18032                  fix these up with a pair of ldr.  */
18033               if (TARGET_THUMB2
18034                   || !CONST_INT_P (otherops[2])
18035                   || (INTVAL (otherops[2]) > -256
18036                       && INTVAL (otherops[2]) < 256))
18037                 {
18038                   if (emit)
18039                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18040                 }
18041               else
18042                 {
18043                   if (emit)
18044                     {
18045                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18046                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18047                     }
18048                   if (count)
18049                     *count = 2;
18050                 }
18051             }
18052           break;
18053
18054         case LABEL_REF:
18055         case CONST:
18056           /* We might be able to use ldrd %0, %1 here.  However the range is
18057              different to ldr/adr, and it is broken on some ARMv7-M
18058              implementations.  */
18059           /* Use the second register of the pair to avoid problematic
18060              overlap.  */
18061           otherops[1] = operands[1];
18062           if (emit)
18063             output_asm_insn ("adr%?\t%0, %1", otherops);
18064           operands[1] = otherops[0];
18065           if (emit)
18066             {
18067               if (TARGET_LDRD)
18068                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18069               else
18070                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18071             }
18072
18073           if (count)
18074             *count = 2;
18075           break;
18076
18077           /* ??? This needs checking for thumb2.  */
18078         default:
18079           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18080                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18081             {
18082               otherops[0] = operands[0];
18083               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18084               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18085
18086               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18087                 {
18088                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18089                     {
18090                       switch ((int) INTVAL (otherops[2]))
18091                         {
18092                         case -8:
18093                           if (emit)
18094                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18095                           return "";
18096                         case -4:
18097                           if (TARGET_THUMB2)
18098                             break;
18099                           if (emit)
18100                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18101                           return "";
18102                         case 4:
18103                           if (TARGET_THUMB2)
18104                             break;
18105                           if (emit)
18106                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18107                           return "";
18108                         }
18109                     }
18110                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18111                   operands[1] = otherops[0];
18112                   if (TARGET_LDRD
18113                       && (REG_P (otherops[2])
18114                           || TARGET_THUMB2
18115                           || (CONST_INT_P (otherops[2])
18116                               && INTVAL (otherops[2]) > -256
18117                               && INTVAL (otherops[2]) < 256)))
18118                     {
18119                       if (reg_overlap_mentioned_p (operands[0],
18120                                                    otherops[2]))
18121                         {
18122                           rtx tmp;
18123                           /* Swap base and index registers over to
18124                              avoid a conflict.  */
18125                           tmp = otherops[1];
18126                           otherops[1] = otherops[2];
18127                           otherops[2] = tmp;
18128                         }
18129                       /* If both registers conflict, it will usually
18130                          have been fixed by a splitter.  */
18131                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18132                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18133                         {
18134                           if (emit)
18135                             {
18136                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18137                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18138                             }
18139                           if (count)
18140                             *count = 2;
18141                         }
18142                       else
18143                         {
18144                           otherops[0] = operands[0];
18145                           if (emit)
18146                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18147                         }
18148                       return "";
18149                     }
18150
18151                   if (CONST_INT_P (otherops[2]))
18152                     {
18153                       if (emit)
18154                         {
18155                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18156                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18157                           else
18158                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18159                         }
18160                     }
18161                   else
18162                     {
18163                       if (emit)
18164                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18165                     }
18166                 }
18167               else
18168                 {
18169                   if (emit)
18170                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18171                 }
18172
18173               if (count)
18174                 *count = 2;
18175
18176               if (TARGET_LDRD)
18177                 return "ldr%(d%)\t%0, [%1]";
18178
18179               return "ldm%(ia%)\t%1, %M0";
18180             }
18181           else
18182             {
18183               otherops[1] = adjust_address (operands[1], SImode, 4);
18184               /* Take care of overlapping base/data reg.  */
18185               if (reg_mentioned_p (operands[0], operands[1]))
18186                 {
18187                   if (emit)
18188                     {
18189                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18190                       output_asm_insn ("ldr%?\t%0, %1", operands);
18191                     }
18192                   if (count)
18193                     *count = 2;
18194
18195                 }
18196               else
18197                 {
18198                   if (emit)
18199                     {
18200                       output_asm_insn ("ldr%?\t%0, %1", operands);
18201                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18202                     }
18203                   if (count)
18204                     *count = 2;
18205                 }
18206             }
18207         }
18208     }
18209   else
18210     {
18211       /* Constraints should ensure this.  */
18212       gcc_assert (code0 == MEM && code1 == REG);
18213       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18214                   || (TARGET_ARM && TARGET_LDRD));
18215
18216       switch (GET_CODE (XEXP (operands[0], 0)))
18217         {
18218         case REG:
18219           if (emit)
18220             {
18221               if (TARGET_LDRD)
18222                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18223               else
18224                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18225             }
18226           break;
18227
18228         case PRE_INC:
18229           gcc_assert (TARGET_LDRD);
18230           if (emit)
18231             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18232           break;
18233
18234         case PRE_DEC:
18235           if (emit)
18236             {
18237               if (TARGET_LDRD)
18238                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18239               else
18240                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18241             }
18242           break;
18243
18244         case POST_INC:
18245           if (emit)
18246             {
18247               if (TARGET_LDRD)
18248                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18249               else
18250                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18251             }
18252           break;
18253
18254         case POST_DEC:
18255           gcc_assert (TARGET_LDRD);
18256           if (emit)
18257             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18258           break;
18259
18260         case PRE_MODIFY:
18261         case POST_MODIFY:
18262           otherops[0] = operands[1];
18263           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18264           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18265
18266           /* IWMMXT allows offsets larger than ldrd can handle,
18267              fix these up with a pair of ldr.  */
18268           if (!TARGET_THUMB2
18269               && CONST_INT_P (otherops[2])
18270               && (INTVAL(otherops[2]) <= -256
18271                   || INTVAL(otherops[2]) >= 256))
18272             {
18273               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18274                 {
18275                   if (emit)
18276                     {
18277                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18278                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18279                     }
18280                   if (count)
18281                     *count = 2;
18282                 }
18283               else
18284                 {
18285                   if (emit)
18286                     {
18287                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18288                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18289                     }
18290                   if (count)
18291                     *count = 2;
18292                 }
18293             }
18294           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18295             {
18296               if (emit)
18297                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18298             }
18299           else
18300             {
18301               if (emit)
18302                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18303             }
18304           break;
18305
18306         case PLUS:
18307           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18308           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18309             {
18310               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18311                 {
18312                 case -8:
18313                   if (emit)
18314                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18315                   return "";
18316
18317                 case -4:
18318                   if (TARGET_THUMB2)
18319                     break;
18320                   if (emit)
18321                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18322                   return "";
18323
18324                 case 4:
18325                   if (TARGET_THUMB2)
18326                     break;
18327                   if (emit)
18328                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18329                   return "";
18330                 }
18331             }
18332           if (TARGET_LDRD
18333               && (REG_P (otherops[2])
18334                   || TARGET_THUMB2
18335                   || (CONST_INT_P (otherops[2])
18336                       && INTVAL (otherops[2]) > -256
18337                       && INTVAL (otherops[2]) < 256)))
18338             {
18339               otherops[0] = operands[1];
18340               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18341               if (emit)
18342                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18343               return "";
18344             }
18345           /* Fall through */
18346
18347         default:
18348           otherops[0] = adjust_address (operands[0], SImode, 4);
18349           otherops[1] = operands[1];
18350           if (emit)
18351             {
18352               output_asm_insn ("str%?\t%1, %0", operands);
18353               output_asm_insn ("str%?\t%H1, %0", otherops);
18354             }
18355           if (count)
18356             *count = 2;
18357         }
18358     }
18359
18360   return "";
18361 }
18362
18363 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18364    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18365
18366 const char *
18367 output_move_quad (rtx *operands)
18368 {
18369   if (REG_P (operands[0]))
18370     {
18371       /* Load, or reg->reg move.  */
18372
18373       if (MEM_P (operands[1]))
18374         {
18375           switch (GET_CODE (XEXP (operands[1], 0)))
18376             {
18377             case REG:
18378               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18379               break;
18380
18381             case LABEL_REF:
18382             case CONST:
18383               output_asm_insn ("adr%?\t%0, %1", operands);
18384               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18385               break;
18386
18387             default:
18388               gcc_unreachable ();
18389             }
18390         }
18391       else
18392         {
18393           rtx ops[2];
18394           int dest, src, i;
18395
18396           gcc_assert (REG_P (operands[1]));
18397
18398           dest = REGNO (operands[0]);
18399           src = REGNO (operands[1]);
18400
18401           /* This seems pretty dumb, but hopefully GCC won't try to do it
18402              very often.  */
18403           if (dest < src)
18404             for (i = 0; i < 4; i++)
18405               {
18406                 ops[0] = gen_rtx_REG (SImode, dest + i);
18407                 ops[1] = gen_rtx_REG (SImode, src + i);
18408                 output_asm_insn ("mov%?\t%0, %1", ops);
18409               }
18410           else
18411             for (i = 3; i >= 0; i--)
18412               {
18413                 ops[0] = gen_rtx_REG (SImode, dest + i);
18414                 ops[1] = gen_rtx_REG (SImode, src + i);
18415                 output_asm_insn ("mov%?\t%0, %1", ops);
18416               }
18417         }
18418     }
18419   else
18420     {
18421       gcc_assert (MEM_P (operands[0]));
18422       gcc_assert (REG_P (operands[1]));
18423       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18424
18425       switch (GET_CODE (XEXP (operands[0], 0)))
18426         {
18427         case REG:
18428           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18429           break;
18430
18431         default:
18432           gcc_unreachable ();
18433         }
18434     }
18435
18436   return "";
18437 }
18438
18439 /* Output a VFP load or store instruction.  */
18440
18441 const char *
18442 output_move_vfp (rtx *operands)
18443 {
18444   rtx reg, mem, addr, ops[2];
18445   int load = REG_P (operands[0]);
18446   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18447   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18448   const char *templ;
18449   char buff[50];
18450   machine_mode mode;
18451
18452   reg = operands[!load];
18453   mem = operands[load];
18454
18455   mode = GET_MODE (reg);
18456
18457   gcc_assert (REG_P (reg));
18458   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18459   gcc_assert (mode == SFmode
18460               || mode == DFmode
18461               || mode == SImode
18462               || mode == DImode
18463               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18464   gcc_assert (MEM_P (mem));
18465
18466   addr = XEXP (mem, 0);
18467
18468   switch (GET_CODE (addr))
18469     {
18470     case PRE_DEC:
18471       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18472       ops[0] = XEXP (addr, 0);
18473       ops[1] = reg;
18474       break;
18475
18476     case POST_INC:
18477       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18478       ops[0] = XEXP (addr, 0);
18479       ops[1] = reg;
18480       break;
18481
18482     default:
18483       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18484       ops[0] = reg;
18485       ops[1] = mem;
18486       break;
18487     }
18488
18489   sprintf (buff, templ,
18490            load ? "ld" : "st",
18491            dp ? "64" : "32",
18492            dp ? "P" : "",
18493            integer_p ? "\t%@ int" : "");
18494   output_asm_insn (buff, ops);
18495
18496   return "";
18497 }
18498
18499 /* Output a Neon double-word or quad-word load or store, or a load
18500    or store for larger structure modes.
18501
18502    WARNING: The ordering of elements is weird in big-endian mode,
18503    because the EABI requires that vectors stored in memory appear
18504    as though they were stored by a VSTM, as required by the EABI.
18505    GCC RTL defines element ordering based on in-memory order.
18506    This can be different from the architectural ordering of elements
18507    within a NEON register. The intrinsics defined in arm_neon.h use the
18508    NEON register element ordering, not the GCC RTL element ordering.
18509
18510    For example, the in-memory ordering of a big-endian a quadword
18511    vector with 16-bit elements when stored from register pair {d0,d1}
18512    will be (lowest address first, d0[N] is NEON register element N):
18513
18514      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18515
18516    When necessary, quadword registers (dN, dN+1) are moved to ARM
18517    registers from rN in the order:
18518
18519      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18520
18521    So that STM/LDM can be used on vectors in ARM registers, and the
18522    same memory layout will result as if VSTM/VLDM were used.
18523
18524    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18525    possible, which allows use of appropriate alignment tags.
18526    Note that the choice of "64" is independent of the actual vector
18527    element size; this size simply ensures that the behavior is
18528    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18529
18530    Due to limitations of those instructions, use of VST1.64/VLD1.64
18531    is not possible if:
18532     - the address contains PRE_DEC, or
18533     - the mode refers to more than 4 double-word registers
18534
18535    In those cases, it would be possible to replace VSTM/VLDM by a
18536    sequence of instructions; this is not currently implemented since
18537    this is not certain to actually improve performance.  */
18538
18539 const char *
18540 output_move_neon (rtx *operands)
18541 {
18542   rtx reg, mem, addr, ops[2];
18543   int regno, nregs, load = REG_P (operands[0]);
18544   const char *templ;
18545   char buff[50];
18546   machine_mode mode;
18547
18548   reg = operands[!load];
18549   mem = operands[load];
18550
18551   mode = GET_MODE (reg);
18552
18553   gcc_assert (REG_P (reg));
18554   regno = REGNO (reg);
18555   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18556   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18557               || NEON_REGNO_OK_FOR_QUAD (regno));
18558   gcc_assert (VALID_NEON_DREG_MODE (mode)
18559               || VALID_NEON_QREG_MODE (mode)
18560               || VALID_NEON_STRUCT_MODE (mode));
18561   gcc_assert (MEM_P (mem));
18562
18563   addr = XEXP (mem, 0);
18564
18565   /* Strip off const from addresses like (const (plus (...))).  */
18566   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18567     addr = XEXP (addr, 0);
18568
18569   switch (GET_CODE (addr))
18570     {
18571     case POST_INC:
18572       /* We have to use vldm / vstm for too-large modes.  */
18573       if (nregs > 4)
18574         {
18575           templ = "v%smia%%?\t%%0!, %%h1";
18576           ops[0] = XEXP (addr, 0);
18577         }
18578       else
18579         {
18580           templ = "v%s1.64\t%%h1, %%A0";
18581           ops[0] = mem;
18582         }
18583       ops[1] = reg;
18584       break;
18585
18586     case PRE_DEC:
18587       /* We have to use vldm / vstm in this case, since there is no
18588          pre-decrement form of the vld1 / vst1 instructions.  */
18589       templ = "v%smdb%%?\t%%0!, %%h1";
18590       ops[0] = XEXP (addr, 0);
18591       ops[1] = reg;
18592       break;
18593
18594     case POST_MODIFY:
18595       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18596       gcc_unreachable ();
18597
18598     case REG:
18599       /* We have to use vldm / vstm for too-large modes.  */
18600       if (nregs > 1)
18601         {
18602           if (nregs > 4)
18603             templ = "v%smia%%?\t%%m0, %%h1";
18604           else
18605             templ = "v%s1.64\t%%h1, %%A0";
18606
18607           ops[0] = mem;
18608           ops[1] = reg;
18609           break;
18610         }
18611       /* Fall through.  */
18612     case LABEL_REF:
18613     case PLUS:
18614       {
18615         int i;
18616         int overlap = -1;
18617         for (i = 0; i < nregs; i++)
18618           {
18619             /* We're only using DImode here because it's a convenient size.  */
18620             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18621             ops[1] = adjust_address (mem, DImode, 8 * i);
18622             if (reg_overlap_mentioned_p (ops[0], mem))
18623               {
18624                 gcc_assert (overlap == -1);
18625                 overlap = i;
18626               }
18627             else
18628               {
18629                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18630                 output_asm_insn (buff, ops);
18631               }
18632           }
18633         if (overlap != -1)
18634           {
18635             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18636             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18637             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18638             output_asm_insn (buff, ops);
18639           }
18640
18641         return "";
18642       }
18643
18644     default:
18645       gcc_unreachable ();
18646     }
18647
18648   sprintf (buff, templ, load ? "ld" : "st");
18649   output_asm_insn (buff, ops);
18650
18651   return "";
18652 }
18653
18654 /* Compute and return the length of neon_mov<mode>, where <mode> is
18655    one of VSTRUCT modes: EI, OI, CI or XI.  */
18656 int
18657 arm_attr_length_move_neon (rtx_insn *insn)
18658 {
18659   rtx reg, mem, addr;
18660   int load;
18661   machine_mode mode;
18662
18663   extract_insn_cached (insn);
18664
18665   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18666     {
18667       mode = GET_MODE (recog_data.operand[0]);
18668       switch (mode)
18669         {
18670         case EImode:
18671         case OImode:
18672           return 8;
18673         case CImode:
18674           return 12;
18675         case XImode:
18676           return 16;
18677         default:
18678           gcc_unreachable ();
18679         }
18680     }
18681
18682   load = REG_P (recog_data.operand[0]);
18683   reg = recog_data.operand[!load];
18684   mem = recog_data.operand[load];
18685
18686   gcc_assert (MEM_P (mem));
18687
18688   mode = GET_MODE (reg);
18689   addr = XEXP (mem, 0);
18690
18691   /* Strip off const from addresses like (const (plus (...))).  */
18692   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18693     addr = XEXP (addr, 0);
18694
18695   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18696     {
18697       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18698       return insns * 4;
18699     }
18700   else
18701     return 4;
18702 }
18703
18704 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18705    return zero.  */
18706
18707 int
18708 arm_address_offset_is_imm (rtx_insn *insn)
18709 {
18710   rtx mem, addr;
18711
18712   extract_insn_cached (insn);
18713
18714   if (REG_P (recog_data.operand[0]))
18715     return 0;
18716
18717   mem = recog_data.operand[0];
18718
18719   gcc_assert (MEM_P (mem));
18720
18721   addr = XEXP (mem, 0);
18722
18723   if (REG_P (addr)
18724       || (GET_CODE (addr) == PLUS
18725           && REG_P (XEXP (addr, 0))
18726           && CONST_INT_P (XEXP (addr, 1))))
18727     return 1;
18728   else
18729     return 0;
18730 }
18731
18732 /* Output an ADD r, s, #n where n may be too big for one instruction.
18733    If adding zero to one register, output nothing.  */
18734 const char *
18735 output_add_immediate (rtx *operands)
18736 {
18737   HOST_WIDE_INT n = INTVAL (operands[2]);
18738
18739   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18740     {
18741       if (n < 0)
18742         output_multi_immediate (operands,
18743                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18744                                 -n);
18745       else
18746         output_multi_immediate (operands,
18747                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18748                                 n);
18749     }
18750
18751   return "";
18752 }
18753
18754 /* Output a multiple immediate operation.
18755    OPERANDS is the vector of operands referred to in the output patterns.
18756    INSTR1 is the output pattern to use for the first constant.
18757    INSTR2 is the output pattern to use for subsequent constants.
18758    IMMED_OP is the index of the constant slot in OPERANDS.
18759    N is the constant value.  */
18760 static const char *
18761 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18762                         int immed_op, HOST_WIDE_INT n)
18763 {
18764 #if HOST_BITS_PER_WIDE_INT > 32
18765   n &= 0xffffffff;
18766 #endif
18767
18768   if (n == 0)
18769     {
18770       /* Quick and easy output.  */
18771       operands[immed_op] = const0_rtx;
18772       output_asm_insn (instr1, operands);
18773     }
18774   else
18775     {
18776       int i;
18777       const char * instr = instr1;
18778
18779       /* Note that n is never zero here (which would give no output).  */
18780       for (i = 0; i < 32; i += 2)
18781         {
18782           if (n & (3 << i))
18783             {
18784               operands[immed_op] = GEN_INT (n & (255 << i));
18785               output_asm_insn (instr, operands);
18786               instr = instr2;
18787               i += 6;
18788             }
18789         }
18790     }
18791
18792   return "";
18793 }
18794
18795 /* Return the name of a shifter operation.  */
18796 static const char *
18797 arm_shift_nmem(enum rtx_code code)
18798 {
18799   switch (code)
18800     {
18801     case ASHIFT:
18802       return ARM_LSL_NAME;
18803
18804     case ASHIFTRT:
18805       return "asr";
18806
18807     case LSHIFTRT:
18808       return "lsr";
18809
18810     case ROTATERT:
18811       return "ror";
18812
18813     default:
18814       abort();
18815     }
18816 }
18817
18818 /* Return the appropriate ARM instruction for the operation code.
18819    The returned result should not be overwritten.  OP is the rtx of the
18820    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18821    was shifted.  */
18822 const char *
18823 arithmetic_instr (rtx op, int shift_first_arg)
18824 {
18825   switch (GET_CODE (op))
18826     {
18827     case PLUS:
18828       return "add";
18829
18830     case MINUS:
18831       return shift_first_arg ? "rsb" : "sub";
18832
18833     case IOR:
18834       return "orr";
18835
18836     case XOR:
18837       return "eor";
18838
18839     case AND:
18840       return "and";
18841
18842     case ASHIFT:
18843     case ASHIFTRT:
18844     case LSHIFTRT:
18845     case ROTATERT:
18846       return arm_shift_nmem(GET_CODE(op));
18847
18848     default:
18849       gcc_unreachable ();
18850     }
18851 }
18852
18853 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18854    for the operation code.  The returned result should not be overwritten.
18855    OP is the rtx code of the shift.
18856    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18857    shift.  */
18858 static const char *
18859 shift_op (rtx op, HOST_WIDE_INT *amountp)
18860 {
18861   const char * mnem;
18862   enum rtx_code code = GET_CODE (op);
18863
18864   switch (code)
18865     {
18866     case ROTATE:
18867       if (!CONST_INT_P (XEXP (op, 1)))
18868         {
18869           output_operand_lossage ("invalid shift operand");
18870           return NULL;
18871         }
18872
18873       code = ROTATERT;
18874       *amountp = 32 - INTVAL (XEXP (op, 1));
18875       mnem = "ror";
18876       break;
18877
18878     case ASHIFT:
18879     case ASHIFTRT:
18880     case LSHIFTRT:
18881     case ROTATERT:
18882       mnem = arm_shift_nmem(code);
18883       if (CONST_INT_P (XEXP (op, 1)))
18884         {
18885           *amountp = INTVAL (XEXP (op, 1));
18886         }
18887       else if (REG_P (XEXP (op, 1)))
18888         {
18889           *amountp = -1;
18890           return mnem;
18891         }
18892       else
18893         {
18894           output_operand_lossage ("invalid shift operand");
18895           return NULL;
18896         }
18897       break;
18898
18899     case MULT:
18900       /* We never have to worry about the amount being other than a
18901          power of 2, since this case can never be reloaded from a reg.  */
18902       if (!CONST_INT_P (XEXP (op, 1)))
18903         {
18904           output_operand_lossage ("invalid shift operand");
18905           return NULL;
18906         }
18907
18908       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18909
18910       /* Amount must be a power of two.  */
18911       if (*amountp & (*amountp - 1))
18912         {
18913           output_operand_lossage ("invalid shift operand");
18914           return NULL;
18915         }
18916
18917       *amountp = int_log2 (*amountp);
18918       return ARM_LSL_NAME;
18919
18920     default:
18921       output_operand_lossage ("invalid shift operand");
18922       return NULL;
18923     }
18924
18925   /* This is not 100% correct, but follows from the desire to merge
18926      multiplication by a power of 2 with the recognizer for a
18927      shift.  >=32 is not a valid shift for "lsl", so we must try and
18928      output a shift that produces the correct arithmetical result.
18929      Using lsr #32 is identical except for the fact that the carry bit
18930      is not set correctly if we set the flags; but we never use the
18931      carry bit from such an operation, so we can ignore that.  */
18932   if (code == ROTATERT)
18933     /* Rotate is just modulo 32.  */
18934     *amountp &= 31;
18935   else if (*amountp != (*amountp & 31))
18936     {
18937       if (code == ASHIFT)
18938         mnem = "lsr";
18939       *amountp = 32;
18940     }
18941
18942   /* Shifts of 0 are no-ops.  */
18943   if (*amountp == 0)
18944     return NULL;
18945
18946   return mnem;
18947 }
18948
18949 /* Obtain the shift from the POWER of two.  */
18950
18951 static HOST_WIDE_INT
18952 int_log2 (HOST_WIDE_INT power)
18953 {
18954   HOST_WIDE_INT shift = 0;
18955
18956   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18957     {
18958       gcc_assert (shift <= 31);
18959       shift++;
18960     }
18961
18962   return shift;
18963 }
18964
18965 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18966    because /bin/as is horribly restrictive.  The judgement about
18967    whether or not each character is 'printable' (and can be output as
18968    is) or not (and must be printed with an octal escape) must be made
18969    with reference to the *host* character set -- the situation is
18970    similar to that discussed in the comments above pp_c_char in
18971    c-pretty-print.c.  */
18972
18973 #define MAX_ASCII_LEN 51
18974
18975 void
18976 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18977 {
18978   int i;
18979   int len_so_far = 0;
18980
18981   fputs ("\t.ascii\t\"", stream);
18982
18983   for (i = 0; i < len; i++)
18984     {
18985       int c = p[i];
18986
18987       if (len_so_far >= MAX_ASCII_LEN)
18988         {
18989           fputs ("\"\n\t.ascii\t\"", stream);
18990           len_so_far = 0;
18991         }
18992
18993       if (ISPRINT (c))
18994         {
18995           if (c == '\\' || c == '\"')
18996             {
18997               putc ('\\', stream);
18998               len_so_far++;
18999             }
19000           putc (c, stream);
19001           len_so_far++;
19002         }
19003       else
19004         {
19005           fprintf (stream, "\\%03o", c);
19006           len_so_far += 4;
19007         }
19008     }
19009
19010   fputs ("\"\n", stream);
19011 }
19012 \f
19013 /* Compute the register save mask for registers 0 through 12
19014    inclusive.  This code is used by arm_compute_save_reg_mask.  */
19015
19016 static unsigned long
19017 arm_compute_save_reg0_reg12_mask (void)
19018 {
19019   unsigned long func_type = arm_current_func_type ();
19020   unsigned long save_reg_mask = 0;
19021   unsigned int reg;
19022
19023   if (IS_INTERRUPT (func_type))
19024     {
19025       unsigned int max_reg;
19026       /* Interrupt functions must not corrupt any registers,
19027          even call clobbered ones.  If this is a leaf function
19028          we can just examine the registers used by the RTL, but
19029          otherwise we have to assume that whatever function is
19030          called might clobber anything, and so we have to save
19031          all the call-clobbered registers as well.  */
19032       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19033         /* FIQ handlers have registers r8 - r12 banked, so
19034            we only need to check r0 - r7, Normal ISRs only
19035            bank r14 and r15, so we must check up to r12.
19036            r13 is the stack pointer which is always preserved,
19037            so we do not need to consider it here.  */
19038         max_reg = 7;
19039       else
19040         max_reg = 12;
19041
19042       for (reg = 0; reg <= max_reg; reg++)
19043         if (df_regs_ever_live_p (reg)
19044             || (! crtl->is_leaf && call_used_regs[reg]))
19045           save_reg_mask |= (1 << reg);
19046
19047       /* Also save the pic base register if necessary.  */
19048       if (flag_pic
19049           && !TARGET_SINGLE_PIC_BASE
19050           && arm_pic_register != INVALID_REGNUM
19051           && crtl->uses_pic_offset_table)
19052         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19053     }
19054   else if (IS_VOLATILE(func_type))
19055     {
19056       /* For noreturn functions we historically omitted register saves
19057          altogether.  However this really messes up debugging.  As a
19058          compromise save just the frame pointers.  Combined with the link
19059          register saved elsewhere this should be sufficient to get
19060          a backtrace.  */
19061       if (frame_pointer_needed)
19062         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19063       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19064         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19065       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19066         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19067     }
19068   else
19069     {
19070       /* In the normal case we only need to save those registers
19071          which are call saved and which are used by this function.  */
19072       for (reg = 0; reg <= 11; reg++)
19073         if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19074           save_reg_mask |= (1 << reg);
19075
19076       /* Handle the frame pointer as a special case.  */
19077       if (frame_pointer_needed)
19078         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19079
19080       /* If we aren't loading the PIC register,
19081          don't stack it even though it may be live.  */
19082       if (flag_pic
19083           && !TARGET_SINGLE_PIC_BASE
19084           && arm_pic_register != INVALID_REGNUM
19085           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19086               || crtl->uses_pic_offset_table))
19087         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19088
19089       /* The prologue will copy SP into R0, so save it.  */
19090       if (IS_STACKALIGN (func_type))
19091         save_reg_mask |= 1;
19092     }
19093
19094   /* Save registers so the exception handler can modify them.  */
19095   if (crtl->calls_eh_return)
19096     {
19097       unsigned int i;
19098
19099       for (i = 0; ; i++)
19100         {
19101           reg = EH_RETURN_DATA_REGNO (i);
19102           if (reg == INVALID_REGNUM)
19103             break;
19104           save_reg_mask |= 1 << reg;
19105         }
19106     }
19107
19108   return save_reg_mask;
19109 }
19110
19111 /* Return true if r3 is live at the start of the function.  */
19112
19113 static bool
19114 arm_r3_live_at_start_p (void)
19115 {
19116   /* Just look at cfg info, which is still close enough to correct at this
19117      point.  This gives false positives for broken functions that might use
19118      uninitialized data that happens to be allocated in r3, but who cares?  */
19119   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19120 }
19121
19122 /* Compute the number of bytes used to store the static chain register on the
19123    stack, above the stack frame.  We need to know this accurately to get the
19124    alignment of the rest of the stack frame correct.  */
19125
19126 static int
19127 arm_compute_static_chain_stack_bytes (void)
19128 {
19129   /* See the defining assertion in arm_expand_prologue.  */
19130   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19131       && IS_NESTED (arm_current_func_type ())
19132       && arm_r3_live_at_start_p ()
19133       && crtl->args.pretend_args_size == 0)
19134     return 4;
19135
19136   return 0;
19137 }
19138
19139 /* Compute a bit mask of which registers need to be
19140    saved on the stack for the current function.
19141    This is used by arm_get_frame_offsets, which may add extra registers.  */
19142
19143 static unsigned long
19144 arm_compute_save_reg_mask (void)
19145 {
19146   unsigned int save_reg_mask = 0;
19147   unsigned long func_type = arm_current_func_type ();
19148   unsigned int reg;
19149
19150   if (IS_NAKED (func_type))
19151     /* This should never really happen.  */
19152     return 0;
19153
19154   /* If we are creating a stack frame, then we must save the frame pointer,
19155      IP (which will hold the old stack pointer), LR and the PC.  */
19156   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19157     save_reg_mask |=
19158       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19159       | (1 << IP_REGNUM)
19160       | (1 << LR_REGNUM)
19161       | (1 << PC_REGNUM);
19162
19163   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19164
19165   /* Decide if we need to save the link register.
19166      Interrupt routines have their own banked link register,
19167      so they never need to save it.
19168      Otherwise if we do not use the link register we do not need to save
19169      it.  If we are pushing other registers onto the stack however, we
19170      can save an instruction in the epilogue by pushing the link register
19171      now and then popping it back into the PC.  This incurs extra memory
19172      accesses though, so we only do it when optimizing for size, and only
19173      if we know that we will not need a fancy return sequence.  */
19174   if (df_regs_ever_live_p (LR_REGNUM)
19175       || (save_reg_mask
19176           && optimize_size
19177           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19178           && !crtl->calls_eh_return))
19179     save_reg_mask |= 1 << LR_REGNUM;
19180
19181   if (cfun->machine->lr_save_eliminated)
19182     save_reg_mask &= ~ (1 << LR_REGNUM);
19183
19184   if (TARGET_REALLY_IWMMXT
19185       && ((bit_count (save_reg_mask)
19186            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19187                            arm_compute_static_chain_stack_bytes())
19188            ) % 2) != 0)
19189     {
19190       /* The total number of registers that are going to be pushed
19191          onto the stack is odd.  We need to ensure that the stack
19192          is 64-bit aligned before we start to save iWMMXt registers,
19193          and also before we start to create locals.  (A local variable
19194          might be a double or long long which we will load/store using
19195          an iWMMXt instruction).  Therefore we need to push another
19196          ARM register, so that the stack will be 64-bit aligned.  We
19197          try to avoid using the arg registers (r0 -r3) as they might be
19198          used to pass values in a tail call.  */
19199       for (reg = 4; reg <= 12; reg++)
19200         if ((save_reg_mask & (1 << reg)) == 0)
19201           break;
19202
19203       if (reg <= 12)
19204         save_reg_mask |= (1 << reg);
19205       else
19206         {
19207           cfun->machine->sibcall_blocked = 1;
19208           save_reg_mask |= (1 << 3);
19209         }
19210     }
19211
19212   /* We may need to push an additional register for use initializing the
19213      PIC base register.  */
19214   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19215       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19216     {
19217       reg = thumb_find_work_register (1 << 4);
19218       if (!call_used_regs[reg])
19219         save_reg_mask |= (1 << reg);
19220     }
19221
19222   return save_reg_mask;
19223 }
19224
19225
19226 /* Compute a bit mask of which registers need to be
19227    saved on the stack for the current function.  */
19228 static unsigned long
19229 thumb1_compute_save_reg_mask (void)
19230 {
19231   unsigned long mask;
19232   unsigned reg;
19233
19234   mask = 0;
19235   for (reg = 0; reg < 12; reg ++)
19236     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19237       mask |= 1 << reg;
19238
19239   if (flag_pic
19240       && !TARGET_SINGLE_PIC_BASE
19241       && arm_pic_register != INVALID_REGNUM
19242       && crtl->uses_pic_offset_table)
19243     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19244
19245   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19246   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19247     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19248
19249   /* LR will also be pushed if any lo regs are pushed.  */
19250   if (mask & 0xff || thumb_force_lr_save ())
19251     mask |= (1 << LR_REGNUM);
19252
19253   /* Make sure we have a low work register if we need one.
19254      We will need one if we are going to push a high register,
19255      but we are not currently intending to push a low register.  */
19256   if ((mask & 0xff) == 0
19257       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19258     {
19259       /* Use thumb_find_work_register to choose which register
19260          we will use.  If the register is live then we will
19261          have to push it.  Use LAST_LO_REGNUM as our fallback
19262          choice for the register to select.  */
19263       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19264       /* Make sure the register returned by thumb_find_work_register is
19265          not part of the return value.  */
19266       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19267         reg = LAST_LO_REGNUM;
19268
19269       if (! call_used_regs[reg])
19270         mask |= 1 << reg;
19271     }
19272
19273   /* The 504 below is 8 bytes less than 512 because there are two possible
19274      alignment words.  We can't tell here if they will be present or not so we
19275      have to play it safe and assume that they are. */
19276   if ((CALLER_INTERWORKING_SLOT_SIZE +
19277        ROUND_UP_WORD (get_frame_size ()) +
19278        crtl->outgoing_args_size) >= 504)
19279     {
19280       /* This is the same as the code in thumb1_expand_prologue() which
19281          determines which register to use for stack decrement. */
19282       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19283         if (mask & (1 << reg))
19284           break;
19285
19286       if (reg > LAST_LO_REGNUM)
19287         {
19288           /* Make sure we have a register available for stack decrement. */
19289           mask |= 1 << LAST_LO_REGNUM;
19290         }
19291     }
19292
19293   return mask;
19294 }
19295
19296
19297 /* Return the number of bytes required to save VFP registers.  */
19298 static int
19299 arm_get_vfp_saved_size (void)
19300 {
19301   unsigned int regno;
19302   int count;
19303   int saved;
19304
19305   saved = 0;
19306   /* Space for saved VFP registers.  */
19307   if (TARGET_HARD_FLOAT && TARGET_VFP)
19308     {
19309       count = 0;
19310       for (regno = FIRST_VFP_REGNUM;
19311            regno < LAST_VFP_REGNUM;
19312            regno += 2)
19313         {
19314           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19315               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19316             {
19317               if (count > 0)
19318                 {
19319                   /* Workaround ARM10 VFPr1 bug.  */
19320                   if (count == 2 && !arm_arch6)
19321                     count++;
19322                   saved += count * 8;
19323                 }
19324               count = 0;
19325             }
19326           else
19327             count++;
19328         }
19329       if (count > 0)
19330         {
19331           if (count == 2 && !arm_arch6)
19332             count++;
19333           saved += count * 8;
19334         }
19335     }
19336   return saved;
19337 }
19338
19339
19340 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19341    everything bar the final return instruction.  If simple_return is true,
19342    then do not output epilogue, because it has already been emitted in RTL.  */
19343 const char *
19344 output_return_instruction (rtx operand, bool really_return, bool reverse,
19345                            bool simple_return)
19346 {
19347   char conditional[10];
19348   char instr[100];
19349   unsigned reg;
19350   unsigned long live_regs_mask;
19351   unsigned long func_type;
19352   arm_stack_offsets *offsets;
19353
19354   func_type = arm_current_func_type ();
19355
19356   if (IS_NAKED (func_type))
19357     return "";
19358
19359   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19360     {
19361       /* If this function was declared non-returning, and we have
19362          found a tail call, then we have to trust that the called
19363          function won't return.  */
19364       if (really_return)
19365         {
19366           rtx ops[2];
19367
19368           /* Otherwise, trap an attempted return by aborting.  */
19369           ops[0] = operand;
19370           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19371                                        : "abort");
19372           assemble_external_libcall (ops[1]);
19373           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19374         }
19375
19376       return "";
19377     }
19378
19379   gcc_assert (!cfun->calls_alloca || really_return);
19380
19381   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19382
19383   cfun->machine->return_used_this_function = 1;
19384
19385   offsets = arm_get_frame_offsets ();
19386   live_regs_mask = offsets->saved_regs_mask;
19387
19388   if (!simple_return && live_regs_mask)
19389     {
19390       const char * return_reg;
19391
19392       /* If we do not have any special requirements for function exit
19393          (e.g. interworking) then we can load the return address
19394          directly into the PC.  Otherwise we must load it into LR.  */
19395       if (really_return
19396           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19397         return_reg = reg_names[PC_REGNUM];
19398       else
19399         return_reg = reg_names[LR_REGNUM];
19400
19401       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19402         {
19403           /* There are three possible reasons for the IP register
19404              being saved.  1) a stack frame was created, in which case
19405              IP contains the old stack pointer, or 2) an ISR routine
19406              corrupted it, or 3) it was saved to align the stack on
19407              iWMMXt.  In case 1, restore IP into SP, otherwise just
19408              restore IP.  */
19409           if (frame_pointer_needed)
19410             {
19411               live_regs_mask &= ~ (1 << IP_REGNUM);
19412               live_regs_mask |=   (1 << SP_REGNUM);
19413             }
19414           else
19415             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19416         }
19417
19418       /* On some ARM architectures it is faster to use LDR rather than
19419          LDM to load a single register.  On other architectures, the
19420          cost is the same.  In 26 bit mode, or for exception handlers,
19421          we have to use LDM to load the PC so that the CPSR is also
19422          restored.  */
19423       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19424         if (live_regs_mask == (1U << reg))
19425           break;
19426
19427       if (reg <= LAST_ARM_REGNUM
19428           && (reg != LR_REGNUM
19429               || ! really_return
19430               || ! IS_INTERRUPT (func_type)))
19431         {
19432           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19433                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19434         }
19435       else
19436         {
19437           char *p;
19438           int first = 1;
19439
19440           /* Generate the load multiple instruction to restore the
19441              registers.  Note we can get here, even if
19442              frame_pointer_needed is true, but only if sp already
19443              points to the base of the saved core registers.  */
19444           if (live_regs_mask & (1 << SP_REGNUM))
19445             {
19446               unsigned HOST_WIDE_INT stack_adjust;
19447
19448               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19449               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19450
19451               if (stack_adjust && arm_arch5 && TARGET_ARM)
19452                 if (TARGET_UNIFIED_ASM)
19453                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19454                 else
19455                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19456               else
19457                 {
19458                   /* If we can't use ldmib (SA110 bug),
19459                      then try to pop r3 instead.  */
19460                   if (stack_adjust)
19461                     live_regs_mask |= 1 << 3;
19462
19463                   if (TARGET_UNIFIED_ASM)
19464                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19465                   else
19466                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19467                 }
19468             }
19469           else
19470             if (TARGET_UNIFIED_ASM)
19471               sprintf (instr, "pop%s\t{", conditional);
19472             else
19473               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19474
19475           p = instr + strlen (instr);
19476
19477           for (reg = 0; reg <= SP_REGNUM; reg++)
19478             if (live_regs_mask & (1 << reg))
19479               {
19480                 int l = strlen (reg_names[reg]);
19481
19482                 if (first)
19483                   first = 0;
19484                 else
19485                   {
19486                     memcpy (p, ", ", 2);
19487                     p += 2;
19488                   }
19489
19490                 memcpy (p, "%|", 2);
19491                 memcpy (p + 2, reg_names[reg], l);
19492                 p += l + 2;
19493               }
19494
19495           if (live_regs_mask & (1 << LR_REGNUM))
19496             {
19497               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19498               /* If returning from an interrupt, restore the CPSR.  */
19499               if (IS_INTERRUPT (func_type))
19500                 strcat (p, "^");
19501             }
19502           else
19503             strcpy (p, "}");
19504         }
19505
19506       output_asm_insn (instr, & operand);
19507
19508       /* See if we need to generate an extra instruction to
19509          perform the actual function return.  */
19510       if (really_return
19511           && func_type != ARM_FT_INTERWORKED
19512           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19513         {
19514           /* The return has already been handled
19515              by loading the LR into the PC.  */
19516           return "";
19517         }
19518     }
19519
19520   if (really_return)
19521     {
19522       switch ((int) ARM_FUNC_TYPE (func_type))
19523         {
19524         case ARM_FT_ISR:
19525         case ARM_FT_FIQ:
19526           /* ??? This is wrong for unified assembly syntax.  */
19527           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19528           break;
19529
19530         case ARM_FT_INTERWORKED:
19531           sprintf (instr, "bx%s\t%%|lr", conditional);
19532           break;
19533
19534         case ARM_FT_EXCEPTION:
19535           /* ??? This is wrong for unified assembly syntax.  */
19536           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19537           break;
19538
19539         default:
19540           /* Use bx if it's available.  */
19541           if (arm_arch5 || arm_arch4t)
19542             sprintf (instr, "bx%s\t%%|lr", conditional);
19543           else
19544             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19545           break;
19546         }
19547
19548       output_asm_insn (instr, & operand);
19549     }
19550
19551   return "";
19552 }
19553
19554 /* Write the function name into the code section, directly preceding
19555    the function prologue.
19556
19557    Code will be output similar to this:
19558      t0
19559          .ascii "arm_poke_function_name", 0
19560          .align
19561      t1
19562          .word 0xff000000 + (t1 - t0)
19563      arm_poke_function_name
19564          mov     ip, sp
19565          stmfd   sp!, {fp, ip, lr, pc}
19566          sub     fp, ip, #4
19567
19568    When performing a stack backtrace, code can inspect the value
19569    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19570    at location pc - 12 and the top 8 bits are set, then we know
19571    that there is a function name embedded immediately preceding this
19572    location and has length ((pc[-3]) & 0xff000000).
19573
19574    We assume that pc is declared as a pointer to an unsigned long.
19575
19576    It is of no benefit to output the function name if we are assembling
19577    a leaf function.  These function types will not contain a stack
19578    backtrace structure, therefore it is not possible to determine the
19579    function name.  */
19580 void
19581 arm_poke_function_name (FILE *stream, const char *name)
19582 {
19583   unsigned long alignlength;
19584   unsigned long length;
19585   rtx           x;
19586
19587   length      = strlen (name) + 1;
19588   alignlength = ROUND_UP_WORD (length);
19589
19590   ASM_OUTPUT_ASCII (stream, name, length);
19591   ASM_OUTPUT_ALIGN (stream, 2);
19592   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19593   assemble_aligned_integer (UNITS_PER_WORD, x);
19594 }
19595
19596 /* Place some comments into the assembler stream
19597    describing the current function.  */
19598 static void
19599 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19600 {
19601   unsigned long func_type;
19602
19603   /* ??? Do we want to print some of the below anyway?  */
19604   if (TARGET_THUMB1)
19605     return;
19606
19607   /* Sanity check.  */
19608   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19609
19610   func_type = arm_current_func_type ();
19611
19612   switch ((int) ARM_FUNC_TYPE (func_type))
19613     {
19614     default:
19615     case ARM_FT_NORMAL:
19616       break;
19617     case ARM_FT_INTERWORKED:
19618       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19619       break;
19620     case ARM_FT_ISR:
19621       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19622       break;
19623     case ARM_FT_FIQ:
19624       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19625       break;
19626     case ARM_FT_EXCEPTION:
19627       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19628       break;
19629     }
19630
19631   if (IS_NAKED (func_type))
19632     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19633
19634   if (IS_VOLATILE (func_type))
19635     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19636
19637   if (IS_NESTED (func_type))
19638     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19639   if (IS_STACKALIGN (func_type))
19640     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19641
19642   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19643                crtl->args.size,
19644                crtl->args.pretend_args_size, frame_size);
19645
19646   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19647                frame_pointer_needed,
19648                cfun->machine->uses_anonymous_args);
19649
19650   if (cfun->machine->lr_save_eliminated)
19651     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19652
19653   if (crtl->calls_eh_return)
19654     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19655
19656 }
19657
19658 static void
19659 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19660                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19661 {
19662   arm_stack_offsets *offsets;
19663
19664   if (TARGET_THUMB1)
19665     {
19666       int regno;
19667
19668       /* Emit any call-via-reg trampolines that are needed for v4t support
19669          of call_reg and call_value_reg type insns.  */
19670       for (regno = 0; regno < LR_REGNUM; regno++)
19671         {
19672           rtx label = cfun->machine->call_via[regno];
19673
19674           if (label != NULL)
19675             {
19676               switch_to_section (function_section (current_function_decl));
19677               targetm.asm_out.internal_label (asm_out_file, "L",
19678                                               CODE_LABEL_NUMBER (label));
19679               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19680             }
19681         }
19682
19683       /* ??? Probably not safe to set this here, since it assumes that a
19684          function will be emitted as assembly immediately after we generate
19685          RTL for it.  This does not happen for inline functions.  */
19686       cfun->machine->return_used_this_function = 0;
19687     }
19688   else /* TARGET_32BIT */
19689     {
19690       /* We need to take into account any stack-frame rounding.  */
19691       offsets = arm_get_frame_offsets ();
19692
19693       gcc_assert (!use_return_insn (FALSE, NULL)
19694                   || (cfun->machine->return_used_this_function != 0)
19695                   || offsets->saved_regs == offsets->outgoing_args
19696                   || frame_pointer_needed);
19697     }
19698 }
19699
19700 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19701    STR and STRD.  If an even number of registers are being pushed, one
19702    or more STRD patterns are created for each register pair.  If an
19703    odd number of registers are pushed, emit an initial STR followed by
19704    as many STRD instructions as are needed.  This works best when the
19705    stack is initially 64-bit aligned (the normal case), since it
19706    ensures that each STRD is also 64-bit aligned.  */
19707 static void
19708 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19709 {
19710   int num_regs = 0;
19711   int i;
19712   int regno;
19713   rtx par = NULL_RTX;
19714   rtx dwarf = NULL_RTX;
19715   rtx tmp;
19716   bool first = true;
19717
19718   num_regs = bit_count (saved_regs_mask);
19719
19720   /* Must be at least one register to save, and can't save SP or PC.  */
19721   gcc_assert (num_regs > 0 && num_regs <= 14);
19722   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19723   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19724
19725   /* Create sequence for DWARF info.  All the frame-related data for
19726      debugging is held in this wrapper.  */
19727   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19728
19729   /* Describe the stack adjustment.  */
19730   tmp = gen_rtx_SET (VOIDmode,
19731                       stack_pointer_rtx,
19732                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19733   RTX_FRAME_RELATED_P (tmp) = 1;
19734   XVECEXP (dwarf, 0, 0) = tmp;
19735
19736   /* Find the first register.  */
19737   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19738     ;
19739
19740   i = 0;
19741
19742   /* If there's an odd number of registers to push.  Start off by
19743      pushing a single register.  This ensures that subsequent strd
19744      operations are dword aligned (assuming that SP was originally
19745      64-bit aligned).  */
19746   if ((num_regs & 1) != 0)
19747     {
19748       rtx reg, mem, insn;
19749
19750       reg = gen_rtx_REG (SImode, regno);
19751       if (num_regs == 1)
19752         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19753                                                      stack_pointer_rtx));
19754       else
19755         mem = gen_frame_mem (Pmode,
19756                              gen_rtx_PRE_MODIFY
19757                              (Pmode, stack_pointer_rtx,
19758                               plus_constant (Pmode, stack_pointer_rtx,
19759                                              -4 * num_regs)));
19760
19761       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19762       RTX_FRAME_RELATED_P (tmp) = 1;
19763       insn = emit_insn (tmp);
19764       RTX_FRAME_RELATED_P (insn) = 1;
19765       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19766       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19767                          reg);
19768       RTX_FRAME_RELATED_P (tmp) = 1;
19769       i++;
19770       regno++;
19771       XVECEXP (dwarf, 0, i) = tmp;
19772       first = false;
19773     }
19774
19775   while (i < num_regs)
19776     if (saved_regs_mask & (1 << regno))
19777       {
19778         rtx reg1, reg2, mem1, mem2;
19779         rtx tmp0, tmp1, tmp2;
19780         int regno2;
19781
19782         /* Find the register to pair with this one.  */
19783         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19784              regno2++)
19785           ;
19786
19787         reg1 = gen_rtx_REG (SImode, regno);
19788         reg2 = gen_rtx_REG (SImode, regno2);
19789
19790         if (first)
19791           {
19792             rtx insn;
19793
19794             first = false;
19795             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19796                                                         stack_pointer_rtx,
19797                                                         -4 * num_regs));
19798             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19799                                                         stack_pointer_rtx,
19800                                                         -4 * (num_regs - 1)));
19801             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19802                                 plus_constant (Pmode, stack_pointer_rtx,
19803                                                -4 * (num_regs)));
19804             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19805             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19806             RTX_FRAME_RELATED_P (tmp0) = 1;
19807             RTX_FRAME_RELATED_P (tmp1) = 1;
19808             RTX_FRAME_RELATED_P (tmp2) = 1;
19809             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19810             XVECEXP (par, 0, 0) = tmp0;
19811             XVECEXP (par, 0, 1) = tmp1;
19812             XVECEXP (par, 0, 2) = tmp2;
19813             insn = emit_insn (par);
19814             RTX_FRAME_RELATED_P (insn) = 1;
19815             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19816           }
19817         else
19818           {
19819             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19820                                                         stack_pointer_rtx,
19821                                                         4 * i));
19822             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19823                                                         stack_pointer_rtx,
19824                                                         4 * (i + 1)));
19825             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19826             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19827             RTX_FRAME_RELATED_P (tmp1) = 1;
19828             RTX_FRAME_RELATED_P (tmp2) = 1;
19829             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19830             XVECEXP (par, 0, 0) = tmp1;
19831             XVECEXP (par, 0, 1) = tmp2;
19832             emit_insn (par);
19833           }
19834
19835         /* Create unwind information.  This is an approximation.  */
19836         tmp1 = gen_rtx_SET (VOIDmode,
19837                             gen_frame_mem (Pmode,
19838                                            plus_constant (Pmode,
19839                                                           stack_pointer_rtx,
19840                                                           4 * i)),
19841                             reg1);
19842         tmp2 = gen_rtx_SET (VOIDmode,
19843                             gen_frame_mem (Pmode,
19844                                            plus_constant (Pmode,
19845                                                           stack_pointer_rtx,
19846                                                           4 * (i + 1))),
19847                             reg2);
19848
19849         RTX_FRAME_RELATED_P (tmp1) = 1;
19850         RTX_FRAME_RELATED_P (tmp2) = 1;
19851         XVECEXP (dwarf, 0, i + 1) = tmp1;
19852         XVECEXP (dwarf, 0, i + 2) = tmp2;
19853         i += 2;
19854         regno = regno2 + 1;
19855       }
19856     else
19857       regno++;
19858
19859   return;
19860 }
19861
19862 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19863    whenever possible, otherwise it emits single-word stores.  The first store
19864    also allocates stack space for all saved registers, using writeback with
19865    post-addressing mode.  All other stores use offset addressing.  If no STRD
19866    can be emitted, this function emits a sequence of single-word stores,
19867    and not an STM as before, because single-word stores provide more freedom
19868    scheduling and can be turned into an STM by peephole optimizations.  */
19869 static void
19870 arm_emit_strd_push (unsigned long saved_regs_mask)
19871 {
19872   int num_regs = 0;
19873   int i, j, dwarf_index  = 0;
19874   int offset = 0;
19875   rtx dwarf = NULL_RTX;
19876   rtx insn = NULL_RTX;
19877   rtx tmp, mem;
19878
19879   /* TODO: A more efficient code can be emitted by changing the
19880      layout, e.g., first push all pairs that can use STRD to keep the
19881      stack aligned, and then push all other registers.  */
19882   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19883     if (saved_regs_mask & (1 << i))
19884       num_regs++;
19885
19886   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19887   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19888   gcc_assert (num_regs > 0);
19889
19890   /* Create sequence for DWARF info.  */
19891   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19892
19893   /* For dwarf info, we generate explicit stack update.  */
19894   tmp = gen_rtx_SET (VOIDmode,
19895                      stack_pointer_rtx,
19896                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19897   RTX_FRAME_RELATED_P (tmp) = 1;
19898   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19899
19900   /* Save registers.  */
19901   offset = - 4 * num_regs;
19902   j = 0;
19903   while (j <= LAST_ARM_REGNUM)
19904     if (saved_regs_mask & (1 << j))
19905       {
19906         if ((j % 2 == 0)
19907             && (saved_regs_mask & (1 << (j + 1))))
19908           {
19909             /* Current register and previous register form register pair for
19910                which STRD can be generated.  */
19911             if (offset < 0)
19912               {
19913                 /* Allocate stack space for all saved registers.  */
19914                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19915                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19916                 mem = gen_frame_mem (DImode, tmp);
19917                 offset = 0;
19918               }
19919             else if (offset > 0)
19920               mem = gen_frame_mem (DImode,
19921                                    plus_constant (Pmode,
19922                                                   stack_pointer_rtx,
19923                                                   offset));
19924             else
19925               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19926
19927             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19928             RTX_FRAME_RELATED_P (tmp) = 1;
19929             tmp = emit_insn (tmp);
19930
19931             /* Record the first store insn.  */
19932             if (dwarf_index == 1)
19933               insn = tmp;
19934
19935             /* Generate dwarf info.  */
19936             mem = gen_frame_mem (SImode,
19937                                  plus_constant (Pmode,
19938                                                 stack_pointer_rtx,
19939                                                 offset));
19940             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19941             RTX_FRAME_RELATED_P (tmp) = 1;
19942             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19943
19944             mem = gen_frame_mem (SImode,
19945                                  plus_constant (Pmode,
19946                                                 stack_pointer_rtx,
19947                                                 offset + 4));
19948             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19949             RTX_FRAME_RELATED_P (tmp) = 1;
19950             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19951
19952             offset += 8;
19953             j += 2;
19954           }
19955         else
19956           {
19957             /* Emit a single word store.  */
19958             if (offset < 0)
19959               {
19960                 /* Allocate stack space for all saved registers.  */
19961                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19962                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19963                 mem = gen_frame_mem (SImode, tmp);
19964                 offset = 0;
19965               }
19966             else if (offset > 0)
19967               mem = gen_frame_mem (SImode,
19968                                    plus_constant (Pmode,
19969                                                   stack_pointer_rtx,
19970                                                   offset));
19971             else
19972               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19973
19974             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19975             RTX_FRAME_RELATED_P (tmp) = 1;
19976             tmp = emit_insn (tmp);
19977
19978             /* Record the first store insn.  */
19979             if (dwarf_index == 1)
19980               insn = tmp;
19981
19982             /* Generate dwarf info.  */
19983             mem = gen_frame_mem (SImode,
19984                                  plus_constant(Pmode,
19985                                                stack_pointer_rtx,
19986                                                offset));
19987             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19988             RTX_FRAME_RELATED_P (tmp) = 1;
19989             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19990
19991             offset += 4;
19992             j += 1;
19993           }
19994       }
19995     else
19996       j++;
19997
19998   /* Attach dwarf info to the first insn we generate.  */
19999   gcc_assert (insn != NULL_RTX);
20000   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20001   RTX_FRAME_RELATED_P (insn) = 1;
20002 }
20003
20004 /* Generate and emit an insn that we will recognize as a push_multi.
20005    Unfortunately, since this insn does not reflect very well the actual
20006    semantics of the operation, we need to annotate the insn for the benefit
20007    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20008    MASK for registers that should be annotated for DWARF2 frame unwind
20009    information.  */
20010 static rtx
20011 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20012 {
20013   int num_regs = 0;
20014   int num_dwarf_regs = 0;
20015   int i, j;
20016   rtx par;
20017   rtx dwarf;
20018   int dwarf_par_index;
20019   rtx tmp, reg;
20020
20021   /* We don't record the PC in the dwarf frame information.  */
20022   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20023
20024   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20025     {
20026       if (mask & (1 << i))
20027         num_regs++;
20028       if (dwarf_regs_mask & (1 << i))
20029         num_dwarf_regs++;
20030     }
20031
20032   gcc_assert (num_regs && num_regs <= 16);
20033   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20034
20035   /* For the body of the insn we are going to generate an UNSPEC in
20036      parallel with several USEs.  This allows the insn to be recognized
20037      by the push_multi pattern in the arm.md file.
20038
20039      The body of the insn looks something like this:
20040
20041        (parallel [
20042            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20043                                         (const_int:SI <num>)))
20044                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20045            (use (reg:SI XX))
20046            (use (reg:SI YY))
20047            ...
20048         ])
20049
20050      For the frame note however, we try to be more explicit and actually
20051      show each register being stored into the stack frame, plus a (single)
20052      decrement of the stack pointer.  We do it this way in order to be
20053      friendly to the stack unwinding code, which only wants to see a single
20054      stack decrement per instruction.  The RTL we generate for the note looks
20055      something like this:
20056
20057       (sequence [
20058            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20059            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20060            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20061            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20062            ...
20063         ])
20064
20065      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20066      instead we'd have a parallel expression detailing all
20067      the stores to the various memory addresses so that debug
20068      information is more up-to-date. Remember however while writing
20069      this to take care of the constraints with the push instruction.
20070
20071      Note also that this has to be taken care of for the VFP registers.
20072
20073      For more see PR43399.  */
20074
20075   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20076   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20077   dwarf_par_index = 1;
20078
20079   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20080     {
20081       if (mask & (1 << i))
20082         {
20083           reg = gen_rtx_REG (SImode, i);
20084
20085           XVECEXP (par, 0, 0)
20086             = gen_rtx_SET (VOIDmode,
20087                            gen_frame_mem
20088                            (BLKmode,
20089                             gen_rtx_PRE_MODIFY (Pmode,
20090                                                 stack_pointer_rtx,
20091                                                 plus_constant
20092                                                 (Pmode, stack_pointer_rtx,
20093                                                  -4 * num_regs))
20094                             ),
20095                            gen_rtx_UNSPEC (BLKmode,
20096                                            gen_rtvec (1, reg),
20097                                            UNSPEC_PUSH_MULT));
20098
20099           if (dwarf_regs_mask & (1 << i))
20100             {
20101               tmp = gen_rtx_SET (VOIDmode,
20102                                  gen_frame_mem (SImode, stack_pointer_rtx),
20103                                  reg);
20104               RTX_FRAME_RELATED_P (tmp) = 1;
20105               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20106             }
20107
20108           break;
20109         }
20110     }
20111
20112   for (j = 1, i++; j < num_regs; i++)
20113     {
20114       if (mask & (1 << i))
20115         {
20116           reg = gen_rtx_REG (SImode, i);
20117
20118           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20119
20120           if (dwarf_regs_mask & (1 << i))
20121             {
20122               tmp
20123                 = gen_rtx_SET (VOIDmode,
20124                                gen_frame_mem
20125                                (SImode,
20126                                 plus_constant (Pmode, stack_pointer_rtx,
20127                                                4 * j)),
20128                                reg);
20129               RTX_FRAME_RELATED_P (tmp) = 1;
20130               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20131             }
20132
20133           j++;
20134         }
20135     }
20136
20137   par = emit_insn (par);
20138
20139   tmp = gen_rtx_SET (VOIDmode,
20140                      stack_pointer_rtx,
20141                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20142   RTX_FRAME_RELATED_P (tmp) = 1;
20143   XVECEXP (dwarf, 0, 0) = tmp;
20144
20145   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20146
20147   return par;
20148 }
20149
20150 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20151    SIZE is the offset to be adjusted.
20152    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20153 static void
20154 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20155 {
20156   rtx dwarf;
20157
20158   RTX_FRAME_RELATED_P (insn) = 1;
20159   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20160   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20161 }
20162
20163 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20164    SAVED_REGS_MASK shows which registers need to be restored.
20165
20166    Unfortunately, since this insn does not reflect very well the actual
20167    semantics of the operation, we need to annotate the insn for the benefit
20168    of DWARF2 frame unwind information.  */
20169 static void
20170 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20171 {
20172   int num_regs = 0;
20173   int i, j;
20174   rtx par;
20175   rtx dwarf = NULL_RTX;
20176   rtx tmp, reg;
20177   bool return_in_pc;
20178   int offset_adj;
20179   int emit_update;
20180
20181   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20182   offset_adj = return_in_pc ? 1 : 0;
20183   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20184     if (saved_regs_mask & (1 << i))
20185       num_regs++;
20186
20187   gcc_assert (num_regs && num_regs <= 16);
20188
20189   /* If SP is in reglist, then we don't emit SP update insn.  */
20190   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20191
20192   /* The parallel needs to hold num_regs SETs
20193      and one SET for the stack update.  */
20194   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20195
20196   if (return_in_pc)
20197     {
20198       tmp = ret_rtx;
20199       XVECEXP (par, 0, 0) = tmp;
20200     }
20201
20202   if (emit_update)
20203     {
20204       /* Increment the stack pointer, based on there being
20205          num_regs 4-byte registers to restore.  */
20206       tmp = gen_rtx_SET (VOIDmode,
20207                          stack_pointer_rtx,
20208                          plus_constant (Pmode,
20209                                         stack_pointer_rtx,
20210                                         4 * num_regs));
20211       RTX_FRAME_RELATED_P (tmp) = 1;
20212       XVECEXP (par, 0, offset_adj) = tmp;
20213     }
20214
20215   /* Now restore every reg, which may include PC.  */
20216   for (j = 0, i = 0; j < num_regs; i++)
20217     if (saved_regs_mask & (1 << i))
20218       {
20219         reg = gen_rtx_REG (SImode, i);
20220         if ((num_regs == 1) && emit_update && !return_in_pc)
20221           {
20222             /* Emit single load with writeback.  */
20223             tmp = gen_frame_mem (SImode,
20224                                  gen_rtx_POST_INC (Pmode,
20225                                                    stack_pointer_rtx));
20226             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20227             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20228             return;
20229           }
20230
20231         tmp = gen_rtx_SET (VOIDmode,
20232                            reg,
20233                            gen_frame_mem
20234                            (SImode,
20235                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20236         RTX_FRAME_RELATED_P (tmp) = 1;
20237         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20238
20239         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20240            should not have PC, skip PC.  */
20241         if (i != PC_REGNUM)
20242           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20243
20244         j++;
20245       }
20246
20247   if (return_in_pc)
20248     par = emit_jump_insn (par);
20249   else
20250     par = emit_insn (par);
20251
20252   REG_NOTES (par) = dwarf;
20253   if (!return_in_pc)
20254     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20255                                  stack_pointer_rtx, stack_pointer_rtx);
20256 }
20257
20258 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20259    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20260
20261    Unfortunately, since this insn does not reflect very well the actual
20262    semantics of the operation, we need to annotate the insn for the benefit
20263    of DWARF2 frame unwind information.  */
20264 static void
20265 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20266 {
20267   int i, j;
20268   rtx par;
20269   rtx dwarf = NULL_RTX;
20270   rtx tmp, reg;
20271
20272   gcc_assert (num_regs && num_regs <= 32);
20273
20274     /* Workaround ARM10 VFPr1 bug.  */
20275   if (num_regs == 2 && !arm_arch6)
20276     {
20277       if (first_reg == 15)
20278         first_reg--;
20279
20280       num_regs++;
20281     }
20282
20283   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20284      there could be up to 32 D-registers to restore.
20285      If there are more than 16 D-registers, make two recursive calls,
20286      each of which emits one pop_multi instruction.  */
20287   if (num_regs > 16)
20288     {
20289       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20290       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20291       return;
20292     }
20293
20294   /* The parallel needs to hold num_regs SETs
20295      and one SET for the stack update.  */
20296   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20297
20298   /* Increment the stack pointer, based on there being
20299      num_regs 8-byte registers to restore.  */
20300   tmp = gen_rtx_SET (VOIDmode,
20301                      base_reg,
20302                      plus_constant (Pmode, base_reg, 8 * num_regs));
20303   RTX_FRAME_RELATED_P (tmp) = 1;
20304   XVECEXP (par, 0, 0) = tmp;
20305
20306   /* Now show every reg that will be restored, using a SET for each.  */
20307   for (j = 0, i=first_reg; j < num_regs; i += 2)
20308     {
20309       reg = gen_rtx_REG (DFmode, i);
20310
20311       tmp = gen_rtx_SET (VOIDmode,
20312                          reg,
20313                          gen_frame_mem
20314                          (DFmode,
20315                           plus_constant (Pmode, base_reg, 8 * j)));
20316       RTX_FRAME_RELATED_P (tmp) = 1;
20317       XVECEXP (par, 0, j + 1) = tmp;
20318
20319       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20320
20321       j++;
20322     }
20323
20324   par = emit_insn (par);
20325   REG_NOTES (par) = dwarf;
20326
20327   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20328   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20329     {
20330       RTX_FRAME_RELATED_P (par) = 1;
20331       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20332     }
20333   else
20334     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20335                                  base_reg, base_reg);
20336 }
20337
20338 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20339    number of registers are being popped, multiple LDRD patterns are created for
20340    all register pairs.  If odd number of registers are popped, last register is
20341    loaded by using LDR pattern.  */
20342 static void
20343 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20344 {
20345   int num_regs = 0;
20346   int i, j;
20347   rtx par = NULL_RTX;
20348   rtx dwarf = NULL_RTX;
20349   rtx tmp, reg, tmp1;
20350   bool return_in_pc;
20351
20352   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20353   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20354     if (saved_regs_mask & (1 << i))
20355       num_regs++;
20356
20357   gcc_assert (num_regs && num_regs <= 16);
20358
20359   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20360      to be popped.  So, if num_regs is even, now it will become odd,
20361      and we can generate pop with PC.  If num_regs is odd, it will be
20362      even now, and ldr with return can be generated for PC.  */
20363   if (return_in_pc)
20364     num_regs--;
20365
20366   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20367
20368   /* Var j iterates over all the registers to gather all the registers in
20369      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20370      A PARALLEL RTX of register-pair is created here, so that pattern for
20371      LDRD can be matched.  As PC is always last register to be popped, and
20372      we have already decremented num_regs if PC, we don't have to worry
20373      about PC in this loop.  */
20374   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20375     if (saved_regs_mask & (1 << j))
20376       {
20377         /* Create RTX for memory load.  */
20378         reg = gen_rtx_REG (SImode, j);
20379         tmp = gen_rtx_SET (SImode,
20380                            reg,
20381                            gen_frame_mem (SImode,
20382                                plus_constant (Pmode,
20383                                               stack_pointer_rtx, 4 * i)));
20384         RTX_FRAME_RELATED_P (tmp) = 1;
20385
20386         if (i % 2 == 0)
20387           {
20388             /* When saved-register index (i) is even, the RTX to be emitted is
20389                yet to be created.  Hence create it first.  The LDRD pattern we
20390                are generating is :
20391                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20392                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20393                where target registers need not be consecutive.  */
20394             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20395             dwarf = NULL_RTX;
20396           }
20397
20398         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20399            added as 0th element and if i is odd, reg_i is added as 1st element
20400            of LDRD pattern shown above.  */
20401         XVECEXP (par, 0, (i % 2)) = tmp;
20402         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20403
20404         if ((i % 2) == 1)
20405           {
20406             /* When saved-register index (i) is odd, RTXs for both the registers
20407                to be loaded are generated in above given LDRD pattern, and the
20408                pattern can be emitted now.  */
20409             par = emit_insn (par);
20410             REG_NOTES (par) = dwarf;
20411             RTX_FRAME_RELATED_P (par) = 1;
20412           }
20413
20414         i++;
20415       }
20416
20417   /* If the number of registers pushed is odd AND return_in_pc is false OR
20418      number of registers are even AND return_in_pc is true, last register is
20419      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20420      then LDR with post increment.  */
20421
20422   /* Increment the stack pointer, based on there being
20423      num_regs 4-byte registers to restore.  */
20424   tmp = gen_rtx_SET (VOIDmode,
20425                      stack_pointer_rtx,
20426                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20427   RTX_FRAME_RELATED_P (tmp) = 1;
20428   tmp = emit_insn (tmp);
20429   if (!return_in_pc)
20430     {
20431       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20432                                    stack_pointer_rtx, stack_pointer_rtx);
20433     }
20434
20435   dwarf = NULL_RTX;
20436
20437   if (((num_regs % 2) == 1 && !return_in_pc)
20438       || ((num_regs % 2) == 0 && return_in_pc))
20439     {
20440       /* Scan for the single register to be popped.  Skip until the saved
20441          register is found.  */
20442       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20443
20444       /* Gen LDR with post increment here.  */
20445       tmp1 = gen_rtx_MEM (SImode,
20446                           gen_rtx_POST_INC (SImode,
20447                                             stack_pointer_rtx));
20448       set_mem_alias_set (tmp1, get_frame_alias_set ());
20449
20450       reg = gen_rtx_REG (SImode, j);
20451       tmp = gen_rtx_SET (SImode, reg, tmp1);
20452       RTX_FRAME_RELATED_P (tmp) = 1;
20453       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20454
20455       if (return_in_pc)
20456         {
20457           /* If return_in_pc, j must be PC_REGNUM.  */
20458           gcc_assert (j == PC_REGNUM);
20459           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20460           XVECEXP (par, 0, 0) = ret_rtx;
20461           XVECEXP (par, 0, 1) = tmp;
20462           par = emit_jump_insn (par);
20463         }
20464       else
20465         {
20466           par = emit_insn (tmp);
20467           REG_NOTES (par) = dwarf;
20468           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20469                                        stack_pointer_rtx, stack_pointer_rtx);
20470         }
20471
20472     }
20473   else if ((num_regs % 2) == 1 && return_in_pc)
20474     {
20475       /* There are 2 registers to be popped.  So, generate the pattern
20476          pop_multiple_with_stack_update_and_return to pop in PC.  */
20477       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20478     }
20479
20480   return;
20481 }
20482
20483 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20484    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20485    offset addressing and then generates one separate stack udpate. This provides
20486    more scheduling freedom, compared to writeback on every load.  However,
20487    if the function returns using load into PC directly
20488    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20489    before the last load.  TODO: Add a peephole optimization to recognize
20490    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20491    peephole optimization to merge the load at stack-offset zero
20492    with the stack update instruction using load with writeback
20493    in post-index addressing mode.  */
20494 static void
20495 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20496 {
20497   int j = 0;
20498   int offset = 0;
20499   rtx par = NULL_RTX;
20500   rtx dwarf = NULL_RTX;
20501   rtx tmp, mem;
20502
20503   /* Restore saved registers.  */
20504   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20505   j = 0;
20506   while (j <= LAST_ARM_REGNUM)
20507     if (saved_regs_mask & (1 << j))
20508       {
20509         if ((j % 2) == 0
20510             && (saved_regs_mask & (1 << (j + 1)))
20511             && (j + 1) != PC_REGNUM)
20512           {
20513             /* Current register and next register form register pair for which
20514                LDRD can be generated. PC is always the last register popped, and
20515                we handle it separately.  */
20516             if (offset > 0)
20517               mem = gen_frame_mem (DImode,
20518                                    plus_constant (Pmode,
20519                                                   stack_pointer_rtx,
20520                                                   offset));
20521             else
20522               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20523
20524             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20525             tmp = emit_insn (tmp);
20526             RTX_FRAME_RELATED_P (tmp) = 1;
20527
20528             /* Generate dwarf info.  */
20529
20530             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20531                                     gen_rtx_REG (SImode, j),
20532                                     NULL_RTX);
20533             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20534                                     gen_rtx_REG (SImode, j + 1),
20535                                     dwarf);
20536
20537             REG_NOTES (tmp) = dwarf;
20538
20539             offset += 8;
20540             j += 2;
20541           }
20542         else if (j != PC_REGNUM)
20543           {
20544             /* Emit a single word load.  */
20545             if (offset > 0)
20546               mem = gen_frame_mem (SImode,
20547                                    plus_constant (Pmode,
20548                                                   stack_pointer_rtx,
20549                                                   offset));
20550             else
20551               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20552
20553             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20554             tmp = emit_insn (tmp);
20555             RTX_FRAME_RELATED_P (tmp) = 1;
20556
20557             /* Generate dwarf info.  */
20558             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20559                                               gen_rtx_REG (SImode, j),
20560                                               NULL_RTX);
20561
20562             offset += 4;
20563             j += 1;
20564           }
20565         else /* j == PC_REGNUM */
20566           j++;
20567       }
20568     else
20569       j++;
20570
20571   /* Update the stack.  */
20572   if (offset > 0)
20573     {
20574       tmp = gen_rtx_SET (Pmode,
20575                          stack_pointer_rtx,
20576                          plus_constant (Pmode,
20577                                         stack_pointer_rtx,
20578                                         offset));
20579       tmp = emit_insn (tmp);
20580       arm_add_cfa_adjust_cfa_note (tmp, offset,
20581                                    stack_pointer_rtx, stack_pointer_rtx);
20582       offset = 0;
20583     }
20584
20585   if (saved_regs_mask & (1 << PC_REGNUM))
20586     {
20587       /* Only PC is to be popped.  */
20588       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20589       XVECEXP (par, 0, 0) = ret_rtx;
20590       tmp = gen_rtx_SET (SImode,
20591                          gen_rtx_REG (SImode, PC_REGNUM),
20592                          gen_frame_mem (SImode,
20593                                         gen_rtx_POST_INC (SImode,
20594                                                           stack_pointer_rtx)));
20595       RTX_FRAME_RELATED_P (tmp) = 1;
20596       XVECEXP (par, 0, 1) = tmp;
20597       par = emit_jump_insn (par);
20598
20599       /* Generate dwarf info.  */
20600       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20601                               gen_rtx_REG (SImode, PC_REGNUM),
20602                               NULL_RTX);
20603       REG_NOTES (par) = dwarf;
20604       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20605                                    stack_pointer_rtx, stack_pointer_rtx);
20606     }
20607 }
20608
20609 /* Calculate the size of the return value that is passed in registers.  */
20610 static unsigned
20611 arm_size_return_regs (void)
20612 {
20613   machine_mode mode;
20614
20615   if (crtl->return_rtx != 0)
20616     mode = GET_MODE (crtl->return_rtx);
20617   else
20618     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20619
20620   return GET_MODE_SIZE (mode);
20621 }
20622
20623 /* Return true if the current function needs to save/restore LR.  */
20624 static bool
20625 thumb_force_lr_save (void)
20626 {
20627   return !cfun->machine->lr_save_eliminated
20628          && (!leaf_function_p ()
20629              || thumb_far_jump_used_p ()
20630              || df_regs_ever_live_p (LR_REGNUM));
20631 }
20632
20633 /* We do not know if r3 will be available because
20634    we do have an indirect tailcall happening in this
20635    particular case.  */
20636 static bool
20637 is_indirect_tailcall_p (rtx call)
20638 {
20639   rtx pat = PATTERN (call);
20640
20641   /* Indirect tail call.  */
20642   pat = XVECEXP (pat, 0, 0);
20643   if (GET_CODE (pat) == SET)
20644     pat = SET_SRC (pat);
20645
20646   pat = XEXP (XEXP (pat, 0), 0);
20647   return REG_P (pat);
20648 }
20649
20650 /* Return true if r3 is used by any of the tail call insns in the
20651    current function.  */
20652 static bool
20653 any_sibcall_could_use_r3 (void)
20654 {
20655   edge_iterator ei;
20656   edge e;
20657
20658   if (!crtl->tail_call_emit)
20659     return false;
20660   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20661     if (e->flags & EDGE_SIBCALL)
20662       {
20663         rtx call = BB_END (e->src);
20664         if (!CALL_P (call))
20665           call = prev_nonnote_nondebug_insn (call);
20666         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20667         if (find_regno_fusage (call, USE, 3)
20668             || is_indirect_tailcall_p (call))
20669           return true;
20670       }
20671   return false;
20672 }
20673
20674
20675 /* Compute the distance from register FROM to register TO.
20676    These can be the arg pointer (26), the soft frame pointer (25),
20677    the stack pointer (13) or the hard frame pointer (11).
20678    In thumb mode r7 is used as the soft frame pointer, if needed.
20679    Typical stack layout looks like this:
20680
20681        old stack pointer -> |    |
20682                              ----
20683                             |    | \
20684                             |    |   saved arguments for
20685                             |    |   vararg functions
20686                             |    | /
20687                               --
20688    hard FP & arg pointer -> |    | \
20689                             |    |   stack
20690                             |    |   frame
20691                             |    | /
20692                               --
20693                             |    | \
20694                             |    |   call saved
20695                             |    |   registers
20696       soft frame pointer -> |    | /
20697                               --
20698                             |    | \
20699                             |    |   local
20700                             |    |   variables
20701      locals base pointer -> |    | /
20702                               --
20703                             |    | \
20704                             |    |   outgoing
20705                             |    |   arguments
20706    current stack pointer -> |    | /
20707                               --
20708
20709   For a given function some or all of these stack components
20710   may not be needed, giving rise to the possibility of
20711   eliminating some of the registers.
20712
20713   The values returned by this function must reflect the behavior
20714   of arm_expand_prologue() and arm_compute_save_reg_mask().
20715
20716   The sign of the number returned reflects the direction of stack
20717   growth, so the values are positive for all eliminations except
20718   from the soft frame pointer to the hard frame pointer.
20719
20720   SFP may point just inside the local variables block to ensure correct
20721   alignment.  */
20722
20723
20724 /* Calculate stack offsets.  These are used to calculate register elimination
20725    offsets and in prologue/epilogue code.  Also calculates which registers
20726    should be saved.  */
20727
20728 static arm_stack_offsets *
20729 arm_get_frame_offsets (void)
20730 {
20731   struct arm_stack_offsets *offsets;
20732   unsigned long func_type;
20733   int leaf;
20734   int saved;
20735   int core_saved;
20736   HOST_WIDE_INT frame_size;
20737   int i;
20738
20739   offsets = &cfun->machine->stack_offsets;
20740
20741   /* We need to know if we are a leaf function.  Unfortunately, it
20742      is possible to be called after start_sequence has been called,
20743      which causes get_insns to return the insns for the sequence,
20744      not the function, which will cause leaf_function_p to return
20745      the incorrect result.
20746
20747      to know about leaf functions once reload has completed, and the
20748      frame size cannot be changed after that time, so we can safely
20749      use the cached value.  */
20750
20751   if (reload_completed)
20752     return offsets;
20753
20754   /* Initially this is the size of the local variables.  It will translated
20755      into an offset once we have determined the size of preceding data.  */
20756   frame_size = ROUND_UP_WORD (get_frame_size ());
20757
20758   leaf = leaf_function_p ();
20759
20760   /* Space for variadic functions.  */
20761   offsets->saved_args = crtl->args.pretend_args_size;
20762
20763   /* In Thumb mode this is incorrect, but never used.  */
20764   offsets->frame
20765     = (offsets->saved_args
20766        + arm_compute_static_chain_stack_bytes ()
20767        + (frame_pointer_needed ? 4 : 0));
20768
20769   if (TARGET_32BIT)
20770     {
20771       unsigned int regno;
20772
20773       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20774       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20775       saved = core_saved;
20776
20777       /* We know that SP will be doubleword aligned on entry, and we must
20778          preserve that condition at any subroutine call.  We also require the
20779          soft frame pointer to be doubleword aligned.  */
20780
20781       if (TARGET_REALLY_IWMMXT)
20782         {
20783           /* Check for the call-saved iWMMXt registers.  */
20784           for (regno = FIRST_IWMMXT_REGNUM;
20785                regno <= LAST_IWMMXT_REGNUM;
20786                regno++)
20787             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20788               saved += 8;
20789         }
20790
20791       func_type = arm_current_func_type ();
20792       /* Space for saved VFP registers.  */
20793       if (! IS_VOLATILE (func_type)
20794           && TARGET_HARD_FLOAT && TARGET_VFP)
20795         saved += arm_get_vfp_saved_size ();
20796     }
20797   else /* TARGET_THUMB1 */
20798     {
20799       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20800       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20801       saved = core_saved;
20802       if (TARGET_BACKTRACE)
20803         saved += 16;
20804     }
20805
20806   /* Saved registers include the stack frame.  */
20807   offsets->saved_regs
20808     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20809   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20810
20811   /* A leaf function does not need any stack alignment if it has nothing
20812      on the stack.  */
20813   if (leaf && frame_size == 0
20814       /* However if it calls alloca(), we have a dynamically allocated
20815          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20816       && ! cfun->calls_alloca)
20817     {
20818       offsets->outgoing_args = offsets->soft_frame;
20819       offsets->locals_base = offsets->soft_frame;
20820       return offsets;
20821     }
20822
20823   /* Ensure SFP has the correct alignment.  */
20824   if (ARM_DOUBLEWORD_ALIGN
20825       && (offsets->soft_frame & 7))
20826     {
20827       offsets->soft_frame += 4;
20828       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20829          when there is a stack frame as the alignment will be rolled into
20830          the normal stack adjustment.  */
20831       if (frame_size + crtl->outgoing_args_size == 0)
20832         {
20833           int reg = -1;
20834
20835           /* Register r3 is caller-saved.  Normally it does not need to be
20836              saved on entry by the prologue.  However if we choose to save
20837              it for padding then we may confuse the compiler into thinking
20838              a prologue sequence is required when in fact it is not.  This
20839              will occur when shrink-wrapping if r3 is used as a scratch
20840              register and there are no other callee-saved writes.
20841
20842              This situation can be avoided when other callee-saved registers
20843              are available and r3 is not mandatory if we choose a callee-saved
20844              register for padding.  */
20845           bool prefer_callee_reg_p = false;
20846
20847           /* If it is safe to use r3, then do so.  This sometimes
20848              generates better code on Thumb-2 by avoiding the need to
20849              use 32-bit push/pop instructions.  */
20850           if (! any_sibcall_could_use_r3 ()
20851               && arm_size_return_regs () <= 12
20852               && (offsets->saved_regs_mask & (1 << 3)) == 0
20853               && (TARGET_THUMB2
20854                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20855             {
20856               reg = 3;
20857               if (!TARGET_THUMB2)
20858                 prefer_callee_reg_p = true;
20859             }
20860           if (reg == -1
20861               || prefer_callee_reg_p)
20862             {
20863               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20864                 {
20865                   /* Avoid fixed registers; they may be changed at
20866                      arbitrary times so it's unsafe to restore them
20867                      during the epilogue.  */
20868                   if (!fixed_regs[i]
20869                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20870                     {
20871                       reg = i;
20872                       break;
20873                     }
20874                 }
20875             }
20876
20877           if (reg != -1)
20878             {
20879               offsets->saved_regs += 4;
20880               offsets->saved_regs_mask |= (1 << reg);
20881             }
20882         }
20883     }
20884
20885   offsets->locals_base = offsets->soft_frame + frame_size;
20886   offsets->outgoing_args = (offsets->locals_base
20887                             + crtl->outgoing_args_size);
20888
20889   if (ARM_DOUBLEWORD_ALIGN)
20890     {
20891       /* Ensure SP remains doubleword aligned.  */
20892       if (offsets->outgoing_args & 7)
20893         offsets->outgoing_args += 4;
20894       gcc_assert (!(offsets->outgoing_args & 7));
20895     }
20896
20897   return offsets;
20898 }
20899
20900
20901 /* Calculate the relative offsets for the different stack pointers.  Positive
20902    offsets are in the direction of stack growth.  */
20903
20904 HOST_WIDE_INT
20905 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20906 {
20907   arm_stack_offsets *offsets;
20908
20909   offsets = arm_get_frame_offsets ();
20910
20911   /* OK, now we have enough information to compute the distances.
20912      There must be an entry in these switch tables for each pair
20913      of registers in ELIMINABLE_REGS, even if some of the entries
20914      seem to be redundant or useless.  */
20915   switch (from)
20916     {
20917     case ARG_POINTER_REGNUM:
20918       switch (to)
20919         {
20920         case THUMB_HARD_FRAME_POINTER_REGNUM:
20921           return 0;
20922
20923         case FRAME_POINTER_REGNUM:
20924           /* This is the reverse of the soft frame pointer
20925              to hard frame pointer elimination below.  */
20926           return offsets->soft_frame - offsets->saved_args;
20927
20928         case ARM_HARD_FRAME_POINTER_REGNUM:
20929           /* This is only non-zero in the case where the static chain register
20930              is stored above the frame.  */
20931           return offsets->frame - offsets->saved_args - 4;
20932
20933         case STACK_POINTER_REGNUM:
20934           /* If nothing has been pushed on the stack at all
20935              then this will return -4.  This *is* correct!  */
20936           return offsets->outgoing_args - (offsets->saved_args + 4);
20937
20938         default:
20939           gcc_unreachable ();
20940         }
20941       gcc_unreachable ();
20942
20943     case FRAME_POINTER_REGNUM:
20944       switch (to)
20945         {
20946         case THUMB_HARD_FRAME_POINTER_REGNUM:
20947           return 0;
20948
20949         case ARM_HARD_FRAME_POINTER_REGNUM:
20950           /* The hard frame pointer points to the top entry in the
20951              stack frame.  The soft frame pointer to the bottom entry
20952              in the stack frame.  If there is no stack frame at all,
20953              then they are identical.  */
20954
20955           return offsets->frame - offsets->soft_frame;
20956
20957         case STACK_POINTER_REGNUM:
20958           return offsets->outgoing_args - offsets->soft_frame;
20959
20960         default:
20961           gcc_unreachable ();
20962         }
20963       gcc_unreachable ();
20964
20965     default:
20966       /* You cannot eliminate from the stack pointer.
20967          In theory you could eliminate from the hard frame
20968          pointer to the stack pointer, but this will never
20969          happen, since if a stack frame is not needed the
20970          hard frame pointer will never be used.  */
20971       gcc_unreachable ();
20972     }
20973 }
20974
20975 /* Given FROM and TO register numbers, say whether this elimination is
20976    allowed.  Frame pointer elimination is automatically handled.
20977
20978    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20979    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20980    pointer, we must eliminate FRAME_POINTER_REGNUM into
20981    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20982    ARG_POINTER_REGNUM.  */
20983
20984 bool
20985 arm_can_eliminate (const int from, const int to)
20986 {
20987   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20988           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20989           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20990           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20991            true);
20992 }
20993
20994 /* Emit RTL to save coprocessor registers on function entry.  Returns the
20995    number of bytes pushed.  */
20996
20997 static int
20998 arm_save_coproc_regs(void)
20999 {
21000   int saved_size = 0;
21001   unsigned reg;
21002   unsigned start_reg;
21003   rtx insn;
21004
21005   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21006     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21007       {
21008         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21009         insn = gen_rtx_MEM (V2SImode, insn);
21010         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21011         RTX_FRAME_RELATED_P (insn) = 1;
21012         saved_size += 8;
21013       }
21014
21015   if (TARGET_HARD_FLOAT && TARGET_VFP)
21016     {
21017       start_reg = FIRST_VFP_REGNUM;
21018
21019       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21020         {
21021           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21022               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21023             {
21024               if (start_reg != reg)
21025                 saved_size += vfp_emit_fstmd (start_reg,
21026                                               (reg - start_reg) / 2);
21027               start_reg = reg + 2;
21028             }
21029         }
21030       if (start_reg != reg)
21031         saved_size += vfp_emit_fstmd (start_reg,
21032                                       (reg - start_reg) / 2);
21033     }
21034   return saved_size;
21035 }
21036
21037
21038 /* Set the Thumb frame pointer from the stack pointer.  */
21039
21040 static void
21041 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21042 {
21043   HOST_WIDE_INT amount;
21044   rtx insn, dwarf;
21045
21046   amount = offsets->outgoing_args - offsets->locals_base;
21047   if (amount < 1024)
21048     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21049                                   stack_pointer_rtx, GEN_INT (amount)));
21050   else
21051     {
21052       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21053       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21054          expects the first two operands to be the same.  */
21055       if (TARGET_THUMB2)
21056         {
21057           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21058                                         stack_pointer_rtx,
21059                                         hard_frame_pointer_rtx));
21060         }
21061       else
21062         {
21063           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21064                                         hard_frame_pointer_rtx,
21065                                         stack_pointer_rtx));
21066         }
21067       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21068                            plus_constant (Pmode, stack_pointer_rtx, amount));
21069       RTX_FRAME_RELATED_P (dwarf) = 1;
21070       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21071     }
21072
21073   RTX_FRAME_RELATED_P (insn) = 1;
21074 }
21075
21076 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21077    function.  */
21078 void
21079 arm_expand_prologue (void)
21080 {
21081   rtx amount;
21082   rtx insn;
21083   rtx ip_rtx;
21084   unsigned long live_regs_mask;
21085   unsigned long func_type;
21086   int fp_offset = 0;
21087   int saved_pretend_args = 0;
21088   int saved_regs = 0;
21089   unsigned HOST_WIDE_INT args_to_push;
21090   arm_stack_offsets *offsets;
21091
21092   func_type = arm_current_func_type ();
21093
21094   /* Naked functions don't have prologues.  */
21095   if (IS_NAKED (func_type))
21096     return;
21097
21098   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21099   args_to_push = crtl->args.pretend_args_size;
21100
21101   /* Compute which register we will have to save onto the stack.  */
21102   offsets = arm_get_frame_offsets ();
21103   live_regs_mask = offsets->saved_regs_mask;
21104
21105   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21106
21107   if (IS_STACKALIGN (func_type))
21108     {
21109       rtx r0, r1;
21110
21111       /* Handle a word-aligned stack pointer.  We generate the following:
21112
21113           mov r0, sp
21114           bic r1, r0, #7
21115           mov sp, r1
21116           <save and restore r0 in normal prologue/epilogue>
21117           mov sp, r0
21118           bx lr
21119
21120          The unwinder doesn't need to know about the stack realignment.
21121          Just tell it we saved SP in r0.  */
21122       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21123
21124       r0 = gen_rtx_REG (SImode, 0);
21125       r1 = gen_rtx_REG (SImode, 1);
21126
21127       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21128       RTX_FRAME_RELATED_P (insn) = 1;
21129       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21130
21131       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21132
21133       /* ??? The CFA changes here, which may cause GDB to conclude that it
21134          has entered a different function.  That said, the unwind info is
21135          correct, individually, before and after this instruction because
21136          we've described the save of SP, which will override the default
21137          handling of SP as restoring from the CFA.  */
21138       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21139     }
21140
21141   /* For APCS frames, if IP register is clobbered
21142      when creating frame, save that register in a special
21143      way.  */
21144   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21145     {
21146       if (IS_INTERRUPT (func_type))
21147         {
21148           /* Interrupt functions must not corrupt any registers.
21149              Creating a frame pointer however, corrupts the IP
21150              register, so we must push it first.  */
21151           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21152
21153           /* Do not set RTX_FRAME_RELATED_P on this insn.
21154              The dwarf stack unwinding code only wants to see one
21155              stack decrement per function, and this is not it.  If
21156              this instruction is labeled as being part of the frame
21157              creation sequence then dwarf2out_frame_debug_expr will
21158              die when it encounters the assignment of IP to FP
21159              later on, since the use of SP here establishes SP as
21160              the CFA register and not IP.
21161
21162              Anyway this instruction is not really part of the stack
21163              frame creation although it is part of the prologue.  */
21164         }
21165       else if (IS_NESTED (func_type))
21166         {
21167           /* The static chain register is the same as the IP register
21168              used as a scratch register during stack frame creation.
21169              To get around this need to find somewhere to store IP
21170              whilst the frame is being created.  We try the following
21171              places in order:
21172
21173                1. The last argument register r3 if it is available.
21174                2. A slot on the stack above the frame if there are no
21175                   arguments to push onto the stack.
21176                3. Register r3 again, after pushing the argument registers
21177                   onto the stack, if this is a varargs function.
21178                4. The last slot on the stack created for the arguments to
21179                   push, if this isn't a varargs function.
21180
21181              Note - we only need to tell the dwarf2 backend about the SP
21182              adjustment in the second variant; the static chain register
21183              doesn't need to be unwound, as it doesn't contain a value
21184              inherited from the caller.  */
21185
21186           if (!arm_r3_live_at_start_p ())
21187             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21188           else if (args_to_push == 0)
21189             {
21190               rtx addr, dwarf;
21191
21192               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21193               saved_regs += 4;
21194
21195               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21196               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21197               fp_offset = 4;
21198
21199               /* Just tell the dwarf backend that we adjusted SP.  */
21200               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21201                                    plus_constant (Pmode, stack_pointer_rtx,
21202                                                   -fp_offset));
21203               RTX_FRAME_RELATED_P (insn) = 1;
21204               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21205             }
21206           else
21207             {
21208               /* Store the args on the stack.  */
21209               if (cfun->machine->uses_anonymous_args)
21210                 {
21211                   insn
21212                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21213                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21214                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21215                   saved_pretend_args = 1;
21216                 }
21217               else
21218                 {
21219                   rtx addr, dwarf;
21220
21221                   if (args_to_push == 4)
21222                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21223                   else
21224                     addr
21225                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21226                                             plus_constant (Pmode,
21227                                                            stack_pointer_rtx,
21228                                                            -args_to_push));
21229
21230                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21231
21232                   /* Just tell the dwarf backend that we adjusted SP.  */
21233                   dwarf
21234                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21235                                    plus_constant (Pmode, stack_pointer_rtx,
21236                                                   -args_to_push));
21237                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21238                 }
21239
21240               RTX_FRAME_RELATED_P (insn) = 1;
21241               fp_offset = args_to_push;
21242               args_to_push = 0;
21243             }
21244         }
21245
21246       insn = emit_set_insn (ip_rtx,
21247                             plus_constant (Pmode, stack_pointer_rtx,
21248                                            fp_offset));
21249       RTX_FRAME_RELATED_P (insn) = 1;
21250     }
21251
21252   if (args_to_push)
21253     {
21254       /* Push the argument registers, or reserve space for them.  */
21255       if (cfun->machine->uses_anonymous_args)
21256         insn = emit_multi_reg_push
21257           ((0xf0 >> (args_to_push / 4)) & 0xf,
21258            (0xf0 >> (args_to_push / 4)) & 0xf);
21259       else
21260         insn = emit_insn
21261           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21262                        GEN_INT (- args_to_push)));
21263       RTX_FRAME_RELATED_P (insn) = 1;
21264     }
21265
21266   /* If this is an interrupt service routine, and the link register
21267      is going to be pushed, and we're not generating extra
21268      push of IP (needed when frame is needed and frame layout if apcs),
21269      subtracting four from LR now will mean that the function return
21270      can be done with a single instruction.  */
21271   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21272       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21273       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21274       && TARGET_ARM)
21275     {
21276       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21277
21278       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21279     }
21280
21281   if (live_regs_mask)
21282     {
21283       unsigned long dwarf_regs_mask = live_regs_mask;
21284
21285       saved_regs += bit_count (live_regs_mask) * 4;
21286       if (optimize_size && !frame_pointer_needed
21287           && saved_regs == offsets->saved_regs - offsets->saved_args)
21288         {
21289           /* If no coprocessor registers are being pushed and we don't have
21290              to worry about a frame pointer then push extra registers to
21291              create the stack frame.  This is done is a way that does not
21292              alter the frame layout, so is independent of the epilogue.  */
21293           int n;
21294           int frame;
21295           n = 0;
21296           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21297             n++;
21298           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21299           if (frame && n * 4 >= frame)
21300             {
21301               n = frame / 4;
21302               live_regs_mask |= (1 << n) - 1;
21303               saved_regs += frame;
21304             }
21305         }
21306
21307       if (TARGET_LDRD
21308           && current_tune->prefer_ldrd_strd
21309           && !optimize_function_for_size_p (cfun))
21310         {
21311           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21312           if (TARGET_THUMB2)
21313             thumb2_emit_strd_push (live_regs_mask);
21314           else if (TARGET_ARM
21315                    && !TARGET_APCS_FRAME
21316                    && !IS_INTERRUPT (func_type))
21317             arm_emit_strd_push (live_regs_mask);
21318           else
21319             {
21320               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21321               RTX_FRAME_RELATED_P (insn) = 1;
21322             }
21323         }
21324       else
21325         {
21326           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21327           RTX_FRAME_RELATED_P (insn) = 1;
21328         }
21329     }
21330
21331   if (! IS_VOLATILE (func_type))
21332     saved_regs += arm_save_coproc_regs ();
21333
21334   if (frame_pointer_needed && TARGET_ARM)
21335     {
21336       /* Create the new frame pointer.  */
21337       if (TARGET_APCS_FRAME)
21338         {
21339           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21340           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21341           RTX_FRAME_RELATED_P (insn) = 1;
21342
21343           if (IS_NESTED (func_type))
21344             {
21345               /* Recover the static chain register.  */
21346               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21347                 insn = gen_rtx_REG (SImode, 3);
21348               else
21349                 {
21350                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21351                   insn = gen_frame_mem (SImode, insn);
21352                 }
21353               emit_set_insn (ip_rtx, insn);
21354               /* Add a USE to stop propagate_one_insn() from barfing.  */
21355               emit_insn (gen_force_register_use (ip_rtx));
21356             }
21357         }
21358       else
21359         {
21360           insn = GEN_INT (saved_regs - 4);
21361           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21362                                         stack_pointer_rtx, insn));
21363           RTX_FRAME_RELATED_P (insn) = 1;
21364         }
21365     }
21366
21367   if (flag_stack_usage_info)
21368     current_function_static_stack_size
21369       = offsets->outgoing_args - offsets->saved_args;
21370
21371   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21372     {
21373       /* This add can produce multiple insns for a large constant, so we
21374          need to get tricky.  */
21375       rtx_insn *last = get_last_insn ();
21376
21377       amount = GEN_INT (offsets->saved_args + saved_regs
21378                         - offsets->outgoing_args);
21379
21380       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21381                                     amount));
21382       do
21383         {
21384           last = last ? NEXT_INSN (last) : get_insns ();
21385           RTX_FRAME_RELATED_P (last) = 1;
21386         }
21387       while (last != insn);
21388
21389       /* If the frame pointer is needed, emit a special barrier that
21390          will prevent the scheduler from moving stores to the frame
21391          before the stack adjustment.  */
21392       if (frame_pointer_needed)
21393         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21394                                          hard_frame_pointer_rtx));
21395     }
21396
21397
21398   if (frame_pointer_needed && TARGET_THUMB2)
21399     thumb_set_frame_pointer (offsets);
21400
21401   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21402     {
21403       unsigned long mask;
21404
21405       mask = live_regs_mask;
21406       mask &= THUMB2_WORK_REGS;
21407       if (!IS_NESTED (func_type))
21408         mask |= (1 << IP_REGNUM);
21409       arm_load_pic_register (mask);
21410     }
21411
21412   /* If we are profiling, make sure no instructions are scheduled before
21413      the call to mcount.  Similarly if the user has requested no
21414      scheduling in the prolog.  Similarly if we want non-call exceptions
21415      using the EABI unwinder, to prevent faulting instructions from being
21416      swapped with a stack adjustment.  */
21417   if (crtl->profile || !TARGET_SCHED_PROLOG
21418       || (arm_except_unwind_info (&global_options) == UI_TARGET
21419           && cfun->can_throw_non_call_exceptions))
21420     emit_insn (gen_blockage ());
21421
21422   /* If the link register is being kept alive, with the return address in it,
21423      then make sure that it does not get reused by the ce2 pass.  */
21424   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21425     cfun->machine->lr_save_eliminated = 1;
21426 }
21427 \f
21428 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21429 static void
21430 arm_print_condition (FILE *stream)
21431 {
21432   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21433     {
21434       /* Branch conversion is not implemented for Thumb-2.  */
21435       if (TARGET_THUMB)
21436         {
21437           output_operand_lossage ("predicated Thumb instruction");
21438           return;
21439         }
21440       if (current_insn_predicate != NULL)
21441         {
21442           output_operand_lossage
21443             ("predicated instruction in conditional sequence");
21444           return;
21445         }
21446
21447       fputs (arm_condition_codes[arm_current_cc], stream);
21448     }
21449   else if (current_insn_predicate)
21450     {
21451       enum arm_cond_code code;
21452
21453       if (TARGET_THUMB1)
21454         {
21455           output_operand_lossage ("predicated Thumb instruction");
21456           return;
21457         }
21458
21459       code = get_arm_condition_code (current_insn_predicate);
21460       fputs (arm_condition_codes[code], stream);
21461     }
21462 }
21463
21464
21465 /* Globally reserved letters: acln
21466    Puncutation letters currently used: @_|?().!#
21467    Lower case letters currently used: bcdefhimpqtvwxyz
21468    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21469    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21470
21471    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21472
21473    If CODE is 'd', then the X is a condition operand and the instruction
21474    should only be executed if the condition is true.
21475    if CODE is 'D', then the X is a condition operand and the instruction
21476    should only be executed if the condition is false: however, if the mode
21477    of the comparison is CCFPEmode, then always execute the instruction -- we
21478    do this because in these circumstances !GE does not necessarily imply LT;
21479    in these cases the instruction pattern will take care to make sure that
21480    an instruction containing %d will follow, thereby undoing the effects of
21481    doing this instruction unconditionally.
21482    If CODE is 'N' then X is a floating point operand that must be negated
21483    before output.
21484    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21485    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21486 static void
21487 arm_print_operand (FILE *stream, rtx x, int code)
21488 {
21489   switch (code)
21490     {
21491     case '@':
21492       fputs (ASM_COMMENT_START, stream);
21493       return;
21494
21495     case '_':
21496       fputs (user_label_prefix, stream);
21497       return;
21498
21499     case '|':
21500       fputs (REGISTER_PREFIX, stream);
21501       return;
21502
21503     case '?':
21504       arm_print_condition (stream);
21505       return;
21506
21507     case '(':
21508       /* Nothing in unified syntax, otherwise the current condition code.  */
21509       if (!TARGET_UNIFIED_ASM)
21510         arm_print_condition (stream);
21511       break;
21512
21513     case ')':
21514       /* The current condition code in unified syntax, otherwise nothing.  */
21515       if (TARGET_UNIFIED_ASM)
21516         arm_print_condition (stream);
21517       break;
21518
21519     case '.':
21520       /* The current condition code for a condition code setting instruction.
21521          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21522       if (TARGET_UNIFIED_ASM)
21523         {
21524           fputc('s', stream);
21525           arm_print_condition (stream);
21526         }
21527       else
21528         {
21529           arm_print_condition (stream);
21530           fputc('s', stream);
21531         }
21532       return;
21533
21534     case '!':
21535       /* If the instruction is conditionally executed then print
21536          the current condition code, otherwise print 's'.  */
21537       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21538       if (current_insn_predicate)
21539         arm_print_condition (stream);
21540       else
21541         fputc('s', stream);
21542       break;
21543
21544     /* %# is a "break" sequence. It doesn't output anything, but is used to
21545        separate e.g. operand numbers from following text, if that text consists
21546        of further digits which we don't want to be part of the operand
21547        number.  */
21548     case '#':
21549       return;
21550
21551     case 'N':
21552       {
21553         REAL_VALUE_TYPE r;
21554         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21555         r = real_value_negate (&r);
21556         fprintf (stream, "%s", fp_const_from_val (&r));
21557       }
21558       return;
21559
21560     /* An integer or symbol address without a preceding # sign.  */
21561     case 'c':
21562       switch (GET_CODE (x))
21563         {
21564         case CONST_INT:
21565           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21566           break;
21567
21568         case SYMBOL_REF:
21569           output_addr_const (stream, x);
21570           break;
21571
21572         case CONST:
21573           if (GET_CODE (XEXP (x, 0)) == PLUS
21574               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21575             {
21576               output_addr_const (stream, x);
21577               break;
21578             }
21579           /* Fall through.  */
21580
21581         default:
21582           output_operand_lossage ("Unsupported operand for code '%c'", code);
21583         }
21584       return;
21585
21586     /* An integer that we want to print in HEX.  */
21587     case 'x':
21588       switch (GET_CODE (x))
21589         {
21590         case CONST_INT:
21591           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21592           break;
21593
21594         default:
21595           output_operand_lossage ("Unsupported operand for code '%c'", code);
21596         }
21597       return;
21598
21599     case 'B':
21600       if (CONST_INT_P (x))
21601         {
21602           HOST_WIDE_INT val;
21603           val = ARM_SIGN_EXTEND (~INTVAL (x));
21604           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21605         }
21606       else
21607         {
21608           putc ('~', stream);
21609           output_addr_const (stream, x);
21610         }
21611       return;
21612
21613     case 'b':
21614       /* Print the log2 of a CONST_INT.  */
21615       {
21616         HOST_WIDE_INT val;
21617
21618         if (!CONST_INT_P (x)
21619             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21620           output_operand_lossage ("Unsupported operand for code '%c'", code);
21621         else
21622           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21623       }
21624       return;
21625
21626     case 'L':
21627       /* The low 16 bits of an immediate constant.  */
21628       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21629       return;
21630
21631     case 'i':
21632       fprintf (stream, "%s", arithmetic_instr (x, 1));
21633       return;
21634
21635     case 'I':
21636       fprintf (stream, "%s", arithmetic_instr (x, 0));
21637       return;
21638
21639     case 'S':
21640       {
21641         HOST_WIDE_INT val;
21642         const char *shift;
21643
21644         shift = shift_op (x, &val);
21645
21646         if (shift)
21647           {
21648             fprintf (stream, ", %s ", shift);
21649             if (val == -1)
21650               arm_print_operand (stream, XEXP (x, 1), 0);
21651             else
21652               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21653           }
21654       }
21655       return;
21656
21657       /* An explanation of the 'Q', 'R' and 'H' register operands:
21658
21659          In a pair of registers containing a DI or DF value the 'Q'
21660          operand returns the register number of the register containing
21661          the least significant part of the value.  The 'R' operand returns
21662          the register number of the register containing the most
21663          significant part of the value.
21664
21665          The 'H' operand returns the higher of the two register numbers.
21666          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21667          same as the 'Q' operand, since the most significant part of the
21668          value is held in the lower number register.  The reverse is true
21669          on systems where WORDS_BIG_ENDIAN is false.
21670
21671          The purpose of these operands is to distinguish between cases
21672          where the endian-ness of the values is important (for example
21673          when they are added together), and cases where the endian-ness
21674          is irrelevant, but the order of register operations is important.
21675          For example when loading a value from memory into a register
21676          pair, the endian-ness does not matter.  Provided that the value
21677          from the lower memory address is put into the lower numbered
21678          register, and the value from the higher address is put into the
21679          higher numbered register, the load will work regardless of whether
21680          the value being loaded is big-wordian or little-wordian.  The
21681          order of the two register loads can matter however, if the address
21682          of the memory location is actually held in one of the registers
21683          being overwritten by the load.
21684
21685          The 'Q' and 'R' constraints are also available for 64-bit
21686          constants.  */
21687     case 'Q':
21688       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21689         {
21690           rtx part = gen_lowpart (SImode, x);
21691           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21692           return;
21693         }
21694
21695       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21696         {
21697           output_operand_lossage ("invalid operand for code '%c'", code);
21698           return;
21699         }
21700
21701       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21702       return;
21703
21704     case 'R':
21705       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21706         {
21707           machine_mode mode = GET_MODE (x);
21708           rtx part;
21709
21710           if (mode == VOIDmode)
21711             mode = DImode;
21712           part = gen_highpart_mode (SImode, mode, x);
21713           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21714           return;
21715         }
21716
21717       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21718         {
21719           output_operand_lossage ("invalid operand for code '%c'", code);
21720           return;
21721         }
21722
21723       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21724       return;
21725
21726     case 'H':
21727       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21728         {
21729           output_operand_lossage ("invalid operand for code '%c'", code);
21730           return;
21731         }
21732
21733       asm_fprintf (stream, "%r", REGNO (x) + 1);
21734       return;
21735
21736     case 'J':
21737       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21738         {
21739           output_operand_lossage ("invalid operand for code '%c'", code);
21740           return;
21741         }
21742
21743       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21744       return;
21745
21746     case 'K':
21747       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21748         {
21749           output_operand_lossage ("invalid operand for code '%c'", code);
21750           return;
21751         }
21752
21753       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21754       return;
21755
21756     case 'm':
21757       asm_fprintf (stream, "%r",
21758                    REG_P (XEXP (x, 0))
21759                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21760       return;
21761
21762     case 'M':
21763       asm_fprintf (stream, "{%r-%r}",
21764                    REGNO (x),
21765                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21766       return;
21767
21768     /* Like 'M', but writing doubleword vector registers, for use by Neon
21769        insns.  */
21770     case 'h':
21771       {
21772         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21773         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21774         if (numregs == 1)
21775           asm_fprintf (stream, "{d%d}", regno);
21776         else
21777           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21778       }
21779       return;
21780
21781     case 'd':
21782       /* CONST_TRUE_RTX means always -- that's the default.  */
21783       if (x == const_true_rtx)
21784         return;
21785
21786       if (!COMPARISON_P (x))
21787         {
21788           output_operand_lossage ("invalid operand for code '%c'", code);
21789           return;
21790         }
21791
21792       fputs (arm_condition_codes[get_arm_condition_code (x)],
21793              stream);
21794       return;
21795
21796     case 'D':
21797       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21798          want to do that.  */
21799       if (x == const_true_rtx)
21800         {
21801           output_operand_lossage ("instruction never executed");
21802           return;
21803         }
21804       if (!COMPARISON_P (x))
21805         {
21806           output_operand_lossage ("invalid operand for code '%c'", code);
21807           return;
21808         }
21809
21810       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21811                                  (get_arm_condition_code (x))],
21812              stream);
21813       return;
21814
21815     case 's':
21816     case 'V':
21817     case 'W':
21818     case 'X':
21819     case 'Y':
21820     case 'Z':
21821       /* Former Maverick support, removed after GCC-4.7.  */
21822       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21823       return;
21824
21825     case 'U':
21826       if (!REG_P (x)
21827           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21828           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21829         /* Bad value for wCG register number.  */
21830         {
21831           output_operand_lossage ("invalid operand for code '%c'", code);
21832           return;
21833         }
21834
21835       else
21836         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21837       return;
21838
21839       /* Print an iWMMXt control register name.  */
21840     case 'w':
21841       if (!CONST_INT_P (x)
21842           || INTVAL (x) < 0
21843           || INTVAL (x) >= 16)
21844         /* Bad value for wC register number.  */
21845         {
21846           output_operand_lossage ("invalid operand for code '%c'", code);
21847           return;
21848         }
21849
21850       else
21851         {
21852           static const char * wc_reg_names [16] =
21853             {
21854               "wCID",  "wCon",  "wCSSF", "wCASF",
21855               "wC4",   "wC5",   "wC6",   "wC7",
21856               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21857               "wC12",  "wC13",  "wC14",  "wC15"
21858             };
21859
21860           fputs (wc_reg_names [INTVAL (x)], stream);
21861         }
21862       return;
21863
21864     /* Print the high single-precision register of a VFP double-precision
21865        register.  */
21866     case 'p':
21867       {
21868         machine_mode mode = GET_MODE (x);
21869         int regno;
21870
21871         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21872           {
21873             output_operand_lossage ("invalid operand for code '%c'", code);
21874             return;
21875           }
21876
21877         regno = REGNO (x);
21878         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21879           {
21880             output_operand_lossage ("invalid operand for code '%c'", code);
21881             return;
21882           }
21883
21884         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21885       }
21886       return;
21887
21888     /* Print a VFP/Neon double precision or quad precision register name.  */
21889     case 'P':
21890     case 'q':
21891       {
21892         machine_mode mode = GET_MODE (x);
21893         int is_quad = (code == 'q');
21894         int regno;
21895
21896         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21897           {
21898             output_operand_lossage ("invalid operand for code '%c'", code);
21899             return;
21900           }
21901
21902         if (!REG_P (x)
21903             || !IS_VFP_REGNUM (REGNO (x)))
21904           {
21905             output_operand_lossage ("invalid operand for code '%c'", code);
21906             return;
21907           }
21908
21909         regno = REGNO (x);
21910         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21911             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21912           {
21913             output_operand_lossage ("invalid operand for code '%c'", code);
21914             return;
21915           }
21916
21917         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21918           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21919       }
21920       return;
21921
21922     /* These two codes print the low/high doubleword register of a Neon quad
21923        register, respectively.  For pair-structure types, can also print
21924        low/high quadword registers.  */
21925     case 'e':
21926     case 'f':
21927       {
21928         machine_mode mode = GET_MODE (x);
21929         int regno;
21930
21931         if ((GET_MODE_SIZE (mode) != 16
21932              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21933           {
21934             output_operand_lossage ("invalid operand for code '%c'", code);
21935             return;
21936           }
21937
21938         regno = REGNO (x);
21939         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21940           {
21941             output_operand_lossage ("invalid operand for code '%c'", code);
21942             return;
21943           }
21944
21945         if (GET_MODE_SIZE (mode) == 16)
21946           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21947                                   + (code == 'f' ? 1 : 0));
21948         else
21949           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21950                                   + (code == 'f' ? 1 : 0));
21951       }
21952       return;
21953
21954     /* Print a VFPv3 floating-point constant, represented as an integer
21955        index.  */
21956     case 'G':
21957       {
21958         int index = vfp3_const_double_index (x);
21959         gcc_assert (index != -1);
21960         fprintf (stream, "%d", index);
21961       }
21962       return;
21963
21964     /* Print bits representing opcode features for Neon.
21965
21966        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
21967        and polynomials as unsigned.
21968
21969        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21970
21971        Bit 2 is 1 for rounding functions, 0 otherwise.  */
21972
21973     /* Identify the type as 's', 'u', 'p' or 'f'.  */
21974     case 'T':
21975       {
21976         HOST_WIDE_INT bits = INTVAL (x);
21977         fputc ("uspf"[bits & 3], stream);
21978       }
21979       return;
21980
21981     /* Likewise, but signed and unsigned integers are both 'i'.  */
21982     case 'F':
21983       {
21984         HOST_WIDE_INT bits = INTVAL (x);
21985         fputc ("iipf"[bits & 3], stream);
21986       }
21987       return;
21988
21989     /* As for 'T', but emit 'u' instead of 'p'.  */
21990     case 't':
21991       {
21992         HOST_WIDE_INT bits = INTVAL (x);
21993         fputc ("usuf"[bits & 3], stream);
21994       }
21995       return;
21996
21997     /* Bit 2: rounding (vs none).  */
21998     case 'O':
21999       {
22000         HOST_WIDE_INT bits = INTVAL (x);
22001         fputs ((bits & 4) != 0 ? "r" : "", stream);
22002       }
22003       return;
22004
22005     /* Memory operand for vld1/vst1 instruction.  */
22006     case 'A':
22007       {
22008         rtx addr;
22009         bool postinc = FALSE;
22010         rtx postinc_reg = NULL;
22011         unsigned align, memsize, align_bits;
22012
22013         gcc_assert (MEM_P (x));
22014         addr = XEXP (x, 0);
22015         if (GET_CODE (addr) == POST_INC)
22016           {
22017             postinc = 1;
22018             addr = XEXP (addr, 0);
22019           }
22020         if (GET_CODE (addr) == POST_MODIFY)
22021           {
22022             postinc_reg = XEXP( XEXP (addr, 1), 1);
22023             addr = XEXP (addr, 0);
22024           }
22025         asm_fprintf (stream, "[%r", REGNO (addr));
22026
22027         /* We know the alignment of this access, so we can emit a hint in the
22028            instruction (for some alignments) as an aid to the memory subsystem
22029            of the target.  */
22030         align = MEM_ALIGN (x) >> 3;
22031         memsize = MEM_SIZE (x);
22032
22033         /* Only certain alignment specifiers are supported by the hardware.  */
22034         if (memsize == 32 && (align % 32) == 0)
22035           align_bits = 256;
22036         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22037           align_bits = 128;
22038         else if (memsize >= 8 && (align % 8) == 0)
22039           align_bits = 64;
22040         else
22041           align_bits = 0;
22042
22043         if (align_bits != 0)
22044           asm_fprintf (stream, ":%d", align_bits);
22045
22046         asm_fprintf (stream, "]");
22047
22048         if (postinc)
22049           fputs("!", stream);
22050         if (postinc_reg)
22051           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22052       }
22053       return;
22054
22055     case 'C':
22056       {
22057         rtx addr;
22058
22059         gcc_assert (MEM_P (x));
22060         addr = XEXP (x, 0);
22061         gcc_assert (REG_P (addr));
22062         asm_fprintf (stream, "[%r]", REGNO (addr));
22063       }
22064       return;
22065
22066     /* Translate an S register number into a D register number and element index.  */
22067     case 'y':
22068       {
22069         machine_mode mode = GET_MODE (x);
22070         int regno;
22071
22072         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22073           {
22074             output_operand_lossage ("invalid operand for code '%c'", code);
22075             return;
22076           }
22077
22078         regno = REGNO (x);
22079         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22080           {
22081             output_operand_lossage ("invalid operand for code '%c'", code);
22082             return;
22083           }
22084
22085         regno = regno - FIRST_VFP_REGNUM;
22086         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22087       }
22088       return;
22089
22090     case 'v':
22091         gcc_assert (CONST_DOUBLE_P (x));
22092         int result;
22093         result = vfp3_const_double_for_fract_bits (x);
22094         if (result == 0)
22095           result = vfp3_const_double_for_bits (x);
22096         fprintf (stream, "#%d", result);
22097         return;
22098
22099     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22100        number into a D register number and element index.  */
22101     case 'z':
22102       {
22103         machine_mode mode = GET_MODE (x);
22104         int regno;
22105
22106         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22107           {
22108             output_operand_lossage ("invalid operand for code '%c'", code);
22109             return;
22110           }
22111
22112         regno = REGNO (x);
22113         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22114           {
22115             output_operand_lossage ("invalid operand for code '%c'", code);
22116             return;
22117           }
22118
22119         regno = regno - FIRST_VFP_REGNUM;
22120         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22121       }
22122       return;
22123
22124     default:
22125       if (x == 0)
22126         {
22127           output_operand_lossage ("missing operand");
22128           return;
22129         }
22130
22131       switch (GET_CODE (x))
22132         {
22133         case REG:
22134           asm_fprintf (stream, "%r", REGNO (x));
22135           break;
22136
22137         case MEM:
22138           output_memory_reference_mode = GET_MODE (x);
22139           output_address (XEXP (x, 0));
22140           break;
22141
22142         case CONST_DOUBLE:
22143           {
22144             char fpstr[20];
22145             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22146                               sizeof (fpstr), 0, 1);
22147             fprintf (stream, "#%s", fpstr);
22148           }
22149           break;
22150
22151         default:
22152           gcc_assert (GET_CODE (x) != NEG);
22153           fputc ('#', stream);
22154           if (GET_CODE (x) == HIGH)
22155             {
22156               fputs (":lower16:", stream);
22157               x = XEXP (x, 0);
22158             }
22159
22160           output_addr_const (stream, x);
22161           break;
22162         }
22163     }
22164 }
22165 \f
22166 /* Target hook for printing a memory address.  */
22167 static void
22168 arm_print_operand_address (FILE *stream, rtx x)
22169 {
22170   if (TARGET_32BIT)
22171     {
22172       int is_minus = GET_CODE (x) == MINUS;
22173
22174       if (REG_P (x))
22175         asm_fprintf (stream, "[%r]", REGNO (x));
22176       else if (GET_CODE (x) == PLUS || is_minus)
22177         {
22178           rtx base = XEXP (x, 0);
22179           rtx index = XEXP (x, 1);
22180           HOST_WIDE_INT offset = 0;
22181           if (!REG_P (base)
22182               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22183             {
22184               /* Ensure that BASE is a register.  */
22185               /* (one of them must be).  */
22186               /* Also ensure the SP is not used as in index register.  */
22187               rtx temp = base;
22188               base = index;
22189               index = temp;
22190             }
22191           switch (GET_CODE (index))
22192             {
22193             case CONST_INT:
22194               offset = INTVAL (index);
22195               if (is_minus)
22196                 offset = -offset;
22197               asm_fprintf (stream, "[%r, #%wd]",
22198                            REGNO (base), offset);
22199               break;
22200
22201             case REG:
22202               asm_fprintf (stream, "[%r, %s%r]",
22203                            REGNO (base), is_minus ? "-" : "",
22204                            REGNO (index));
22205               break;
22206
22207             case MULT:
22208             case ASHIFTRT:
22209             case LSHIFTRT:
22210             case ASHIFT:
22211             case ROTATERT:
22212               {
22213                 asm_fprintf (stream, "[%r, %s%r",
22214                              REGNO (base), is_minus ? "-" : "",
22215                              REGNO (XEXP (index, 0)));
22216                 arm_print_operand (stream, index, 'S');
22217                 fputs ("]", stream);
22218                 break;
22219               }
22220
22221             default:
22222               gcc_unreachable ();
22223             }
22224         }
22225       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22226                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22227         {
22228           extern machine_mode output_memory_reference_mode;
22229
22230           gcc_assert (REG_P (XEXP (x, 0)));
22231
22232           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22233             asm_fprintf (stream, "[%r, #%s%d]!",
22234                          REGNO (XEXP (x, 0)),
22235                          GET_CODE (x) == PRE_DEC ? "-" : "",
22236                          GET_MODE_SIZE (output_memory_reference_mode));
22237           else
22238             asm_fprintf (stream, "[%r], #%s%d",
22239                          REGNO (XEXP (x, 0)),
22240                          GET_CODE (x) == POST_DEC ? "-" : "",
22241                          GET_MODE_SIZE (output_memory_reference_mode));
22242         }
22243       else if (GET_CODE (x) == PRE_MODIFY)
22244         {
22245           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22246           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22247             asm_fprintf (stream, "#%wd]!",
22248                          INTVAL (XEXP (XEXP (x, 1), 1)));
22249           else
22250             asm_fprintf (stream, "%r]!",
22251                          REGNO (XEXP (XEXP (x, 1), 1)));
22252         }
22253       else if (GET_CODE (x) == POST_MODIFY)
22254         {
22255           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22256           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22257             asm_fprintf (stream, "#%wd",
22258                          INTVAL (XEXP (XEXP (x, 1), 1)));
22259           else
22260             asm_fprintf (stream, "%r",
22261                          REGNO (XEXP (XEXP (x, 1), 1)));
22262         }
22263       else output_addr_const (stream, x);
22264     }
22265   else
22266     {
22267       if (REG_P (x))
22268         asm_fprintf (stream, "[%r]", REGNO (x));
22269       else if (GET_CODE (x) == POST_INC)
22270         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22271       else if (GET_CODE (x) == PLUS)
22272         {
22273           gcc_assert (REG_P (XEXP (x, 0)));
22274           if (CONST_INT_P (XEXP (x, 1)))
22275             asm_fprintf (stream, "[%r, #%wd]",
22276                          REGNO (XEXP (x, 0)),
22277                          INTVAL (XEXP (x, 1)));
22278           else
22279             asm_fprintf (stream, "[%r, %r]",
22280                          REGNO (XEXP (x, 0)),
22281                          REGNO (XEXP (x, 1)));
22282         }
22283       else
22284         output_addr_const (stream, x);
22285     }
22286 }
22287 \f
22288 /* Target hook for indicating whether a punctuation character for
22289    TARGET_PRINT_OPERAND is valid.  */
22290 static bool
22291 arm_print_operand_punct_valid_p (unsigned char code)
22292 {
22293   return (code == '@' || code == '|' || code == '.'
22294           || code == '(' || code == ')' || code == '#'
22295           || (TARGET_32BIT && (code == '?'))
22296           || (TARGET_THUMB2 && (code == '!'))
22297           || (TARGET_THUMB && (code == '_')));
22298 }
22299 \f
22300 /* Target hook for assembling integer objects.  The ARM version needs to
22301    handle word-sized values specially.  */
22302 static bool
22303 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22304 {
22305   machine_mode mode;
22306
22307   if (size == UNITS_PER_WORD && aligned_p)
22308     {
22309       fputs ("\t.word\t", asm_out_file);
22310       output_addr_const (asm_out_file, x);
22311
22312       /* Mark symbols as position independent.  We only do this in the
22313          .text segment, not in the .data segment.  */
22314       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22315           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22316         {
22317           /* See legitimize_pic_address for an explanation of the
22318              TARGET_VXWORKS_RTP check.  */
22319           if (!arm_pic_data_is_text_relative
22320               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22321             fputs ("(GOT)", asm_out_file);
22322           else
22323             fputs ("(GOTOFF)", asm_out_file);
22324         }
22325       fputc ('\n', asm_out_file);
22326       return true;
22327     }
22328
22329   mode = GET_MODE (x);
22330
22331   if (arm_vector_mode_supported_p (mode))
22332     {
22333       int i, units;
22334
22335       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22336
22337       units = CONST_VECTOR_NUNITS (x);
22338       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22339
22340       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22341         for (i = 0; i < units; i++)
22342           {
22343             rtx elt = CONST_VECTOR_ELT (x, i);
22344             assemble_integer
22345               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22346           }
22347       else
22348         for (i = 0; i < units; i++)
22349           {
22350             rtx elt = CONST_VECTOR_ELT (x, i);
22351             REAL_VALUE_TYPE rval;
22352
22353             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22354
22355             assemble_real
22356               (rval, GET_MODE_INNER (mode),
22357               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22358           }
22359
22360       return true;
22361     }
22362
22363   return default_assemble_integer (x, size, aligned_p);
22364 }
22365
22366 static void
22367 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22368 {
22369   section *s;
22370
22371   if (!TARGET_AAPCS_BASED)
22372     {
22373       (is_ctor ?
22374        default_named_section_asm_out_constructor
22375        : default_named_section_asm_out_destructor) (symbol, priority);
22376       return;
22377     }
22378
22379   /* Put these in the .init_array section, using a special relocation.  */
22380   if (priority != DEFAULT_INIT_PRIORITY)
22381     {
22382       char buf[18];
22383       sprintf (buf, "%s.%.5u",
22384                is_ctor ? ".init_array" : ".fini_array",
22385                priority);
22386       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22387     }
22388   else if (is_ctor)
22389     s = ctors_section;
22390   else
22391     s = dtors_section;
22392
22393   switch_to_section (s);
22394   assemble_align (POINTER_SIZE);
22395   fputs ("\t.word\t", asm_out_file);
22396   output_addr_const (asm_out_file, symbol);
22397   fputs ("(target1)\n", asm_out_file);
22398 }
22399
22400 /* Add a function to the list of static constructors.  */
22401
22402 static void
22403 arm_elf_asm_constructor (rtx symbol, int priority)
22404 {
22405   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22406 }
22407
22408 /* Add a function to the list of static destructors.  */
22409
22410 static void
22411 arm_elf_asm_destructor (rtx symbol, int priority)
22412 {
22413   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22414 }
22415 \f
22416 /* A finite state machine takes care of noticing whether or not instructions
22417    can be conditionally executed, and thus decrease execution time and code
22418    size by deleting branch instructions.  The fsm is controlled by
22419    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22420
22421 /* The state of the fsm controlling condition codes are:
22422    0: normal, do nothing special
22423    1: make ASM_OUTPUT_OPCODE not output this instruction
22424    2: make ASM_OUTPUT_OPCODE not output this instruction
22425    3: make instructions conditional
22426    4: make instructions conditional
22427
22428    State transitions (state->state by whom under condition):
22429    0 -> 1 final_prescan_insn if the `target' is a label
22430    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22431    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22432    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22433    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22434           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22435    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22436           (the target insn is arm_target_insn).
22437
22438    If the jump clobbers the conditions then we use states 2 and 4.
22439
22440    A similar thing can be done with conditional return insns.
22441
22442    XXX In case the `target' is an unconditional branch, this conditionalising
22443    of the instructions always reduces code size, but not always execution
22444    time.  But then, I want to reduce the code size to somewhere near what
22445    /bin/cc produces.  */
22446
22447 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22448    instructions.  When a COND_EXEC instruction is seen the subsequent
22449    instructions are scanned so that multiple conditional instructions can be
22450    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22451    specify the length and true/false mask for the IT block.  These will be
22452    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22453
22454 /* Returns the index of the ARM condition code string in
22455    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22456    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22457
22458 enum arm_cond_code
22459 maybe_get_arm_condition_code (rtx comparison)
22460 {
22461   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22462   enum arm_cond_code code;
22463   enum rtx_code comp_code = GET_CODE (comparison);
22464
22465   if (GET_MODE_CLASS (mode) != MODE_CC)
22466     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22467                            XEXP (comparison, 1));
22468
22469   switch (mode)
22470     {
22471     case CC_DNEmode: code = ARM_NE; goto dominance;
22472     case CC_DEQmode: code = ARM_EQ; goto dominance;
22473     case CC_DGEmode: code = ARM_GE; goto dominance;
22474     case CC_DGTmode: code = ARM_GT; goto dominance;
22475     case CC_DLEmode: code = ARM_LE; goto dominance;
22476     case CC_DLTmode: code = ARM_LT; goto dominance;
22477     case CC_DGEUmode: code = ARM_CS; goto dominance;
22478     case CC_DGTUmode: code = ARM_HI; goto dominance;
22479     case CC_DLEUmode: code = ARM_LS; goto dominance;
22480     case CC_DLTUmode: code = ARM_CC;
22481
22482     dominance:
22483       if (comp_code == EQ)
22484         return ARM_INVERSE_CONDITION_CODE (code);
22485       if (comp_code == NE)
22486         return code;
22487       return ARM_NV;
22488
22489     case CC_NOOVmode:
22490       switch (comp_code)
22491         {
22492         case NE: return ARM_NE;
22493         case EQ: return ARM_EQ;
22494         case GE: return ARM_PL;
22495         case LT: return ARM_MI;
22496         default: return ARM_NV;
22497         }
22498
22499     case CC_Zmode:
22500       switch (comp_code)
22501         {
22502         case NE: return ARM_NE;
22503         case EQ: return ARM_EQ;
22504         default: return ARM_NV;
22505         }
22506
22507     case CC_Nmode:
22508       switch (comp_code)
22509         {
22510         case NE: return ARM_MI;
22511         case EQ: return ARM_PL;
22512         default: return ARM_NV;
22513         }
22514
22515     case CCFPEmode:
22516     case CCFPmode:
22517       /* We can handle all cases except UNEQ and LTGT.  */
22518       switch (comp_code)
22519         {
22520         case GE: return ARM_GE;
22521         case GT: return ARM_GT;
22522         case LE: return ARM_LS;
22523         case LT: return ARM_MI;
22524         case NE: return ARM_NE;
22525         case EQ: return ARM_EQ;
22526         case ORDERED: return ARM_VC;
22527         case UNORDERED: return ARM_VS;
22528         case UNLT: return ARM_LT;
22529         case UNLE: return ARM_LE;
22530         case UNGT: return ARM_HI;
22531         case UNGE: return ARM_PL;
22532           /* UNEQ and LTGT do not have a representation.  */
22533         case UNEQ: /* Fall through.  */
22534         case LTGT: /* Fall through.  */
22535         default: return ARM_NV;
22536         }
22537
22538     case CC_SWPmode:
22539       switch (comp_code)
22540         {
22541         case NE: return ARM_NE;
22542         case EQ: return ARM_EQ;
22543         case GE: return ARM_LE;
22544         case GT: return ARM_LT;
22545         case LE: return ARM_GE;
22546         case LT: return ARM_GT;
22547         case GEU: return ARM_LS;
22548         case GTU: return ARM_CC;
22549         case LEU: return ARM_CS;
22550         case LTU: return ARM_HI;
22551         default: return ARM_NV;
22552         }
22553
22554     case CC_Cmode:
22555       switch (comp_code)
22556         {
22557         case LTU: return ARM_CS;
22558         case GEU: return ARM_CC;
22559         default: return ARM_NV;
22560         }
22561
22562     case CC_CZmode:
22563       switch (comp_code)
22564         {
22565         case NE: return ARM_NE;
22566         case EQ: return ARM_EQ;
22567         case GEU: return ARM_CS;
22568         case GTU: return ARM_HI;
22569         case LEU: return ARM_LS;
22570         case LTU: return ARM_CC;
22571         default: return ARM_NV;
22572         }
22573
22574     case CC_NCVmode:
22575       switch (comp_code)
22576         {
22577         case GE: return ARM_GE;
22578         case LT: return ARM_LT;
22579         case GEU: return ARM_CS;
22580         case LTU: return ARM_CC;
22581         default: return ARM_NV;
22582         }
22583
22584     case CCmode:
22585       switch (comp_code)
22586         {
22587         case NE: return ARM_NE;
22588         case EQ: return ARM_EQ;
22589         case GE: return ARM_GE;
22590         case GT: return ARM_GT;
22591         case LE: return ARM_LE;
22592         case LT: return ARM_LT;
22593         case GEU: return ARM_CS;
22594         case GTU: return ARM_HI;
22595         case LEU: return ARM_LS;
22596         case LTU: return ARM_CC;
22597         default: return ARM_NV;
22598         }
22599
22600     default: gcc_unreachable ();
22601     }
22602 }
22603
22604 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22605 static enum arm_cond_code
22606 get_arm_condition_code (rtx comparison)
22607 {
22608   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22609   gcc_assert (code != ARM_NV);
22610   return code;
22611 }
22612
22613 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22614    instructions.  */
22615 void
22616 thumb2_final_prescan_insn (rtx_insn *insn)
22617 {
22618   rtx_insn *first_insn = insn;
22619   rtx body = PATTERN (insn);
22620   rtx predicate;
22621   enum arm_cond_code code;
22622   int n;
22623   int mask;
22624   int max;
22625
22626   /* max_insns_skipped in the tune was already taken into account in the
22627      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22628      just emit the IT blocks as we can.  It does not make sense to split
22629      the IT blocks.  */
22630   max = MAX_INSN_PER_IT_BLOCK;
22631
22632   /* Remove the previous insn from the count of insns to be output.  */
22633   if (arm_condexec_count)
22634       arm_condexec_count--;
22635
22636   /* Nothing to do if we are already inside a conditional block.  */
22637   if (arm_condexec_count)
22638     return;
22639
22640   if (GET_CODE (body) != COND_EXEC)
22641     return;
22642
22643   /* Conditional jumps are implemented directly.  */
22644   if (JUMP_P (insn))
22645     return;
22646
22647   predicate = COND_EXEC_TEST (body);
22648   arm_current_cc = get_arm_condition_code (predicate);
22649
22650   n = get_attr_ce_count (insn);
22651   arm_condexec_count = 1;
22652   arm_condexec_mask = (1 << n) - 1;
22653   arm_condexec_masklen = n;
22654   /* See if subsequent instructions can be combined into the same block.  */
22655   for (;;)
22656     {
22657       insn = next_nonnote_insn (insn);
22658
22659       /* Jumping into the middle of an IT block is illegal, so a label or
22660          barrier terminates the block.  */
22661       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22662         break;
22663
22664       body = PATTERN (insn);
22665       /* USE and CLOBBER aren't really insns, so just skip them.  */
22666       if (GET_CODE (body) == USE
22667           || GET_CODE (body) == CLOBBER)
22668         continue;
22669
22670       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22671       if (GET_CODE (body) != COND_EXEC)
22672         break;
22673       /* Maximum number of conditionally executed instructions in a block.  */
22674       n = get_attr_ce_count (insn);
22675       if (arm_condexec_masklen + n > max)
22676         break;
22677
22678       predicate = COND_EXEC_TEST (body);
22679       code = get_arm_condition_code (predicate);
22680       mask = (1 << n) - 1;
22681       if (arm_current_cc == code)
22682         arm_condexec_mask |= (mask << arm_condexec_masklen);
22683       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22684         break;
22685
22686       arm_condexec_count++;
22687       arm_condexec_masklen += n;
22688
22689       /* A jump must be the last instruction in a conditional block.  */
22690       if (JUMP_P (insn))
22691         break;
22692     }
22693   /* Restore recog_data (getting the attributes of other insns can
22694      destroy this array, but final.c assumes that it remains intact
22695      across this call).  */
22696   extract_constrain_insn_cached (first_insn);
22697 }
22698
22699 void
22700 arm_final_prescan_insn (rtx_insn *insn)
22701 {
22702   /* BODY will hold the body of INSN.  */
22703   rtx body = PATTERN (insn);
22704
22705   /* This will be 1 if trying to repeat the trick, and things need to be
22706      reversed if it appears to fail.  */
22707   int reverse = 0;
22708
22709   /* If we start with a return insn, we only succeed if we find another one.  */
22710   int seeking_return = 0;
22711   enum rtx_code return_code = UNKNOWN;
22712
22713   /* START_INSN will hold the insn from where we start looking.  This is the
22714      first insn after the following code_label if REVERSE is true.  */
22715   rtx_insn *start_insn = insn;
22716
22717   /* If in state 4, check if the target branch is reached, in order to
22718      change back to state 0.  */
22719   if (arm_ccfsm_state == 4)
22720     {
22721       if (insn == arm_target_insn)
22722         {
22723           arm_target_insn = NULL;
22724           arm_ccfsm_state = 0;
22725         }
22726       return;
22727     }
22728
22729   /* If in state 3, it is possible to repeat the trick, if this insn is an
22730      unconditional branch to a label, and immediately following this branch
22731      is the previous target label which is only used once, and the label this
22732      branch jumps to is not too far off.  */
22733   if (arm_ccfsm_state == 3)
22734     {
22735       if (simplejump_p (insn))
22736         {
22737           start_insn = next_nonnote_insn (start_insn);
22738           if (BARRIER_P (start_insn))
22739             {
22740               /* XXX Isn't this always a barrier?  */
22741               start_insn = next_nonnote_insn (start_insn);
22742             }
22743           if (LABEL_P (start_insn)
22744               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22745               && LABEL_NUSES (start_insn) == 1)
22746             reverse = TRUE;
22747           else
22748             return;
22749         }
22750       else if (ANY_RETURN_P (body))
22751         {
22752           start_insn = next_nonnote_insn (start_insn);
22753           if (BARRIER_P (start_insn))
22754             start_insn = next_nonnote_insn (start_insn);
22755           if (LABEL_P (start_insn)
22756               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22757               && LABEL_NUSES (start_insn) == 1)
22758             {
22759               reverse = TRUE;
22760               seeking_return = 1;
22761               return_code = GET_CODE (body);
22762             }
22763           else
22764             return;
22765         }
22766       else
22767         return;
22768     }
22769
22770   gcc_assert (!arm_ccfsm_state || reverse);
22771   if (!JUMP_P (insn))
22772     return;
22773
22774   /* This jump might be paralleled with a clobber of the condition codes
22775      the jump should always come first */
22776   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22777     body = XVECEXP (body, 0, 0);
22778
22779   if (reverse
22780       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22781           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22782     {
22783       int insns_skipped;
22784       int fail = FALSE, succeed = FALSE;
22785       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22786       int then_not_else = TRUE;
22787       rtx_insn *this_insn = start_insn;
22788       rtx label = 0;
22789
22790       /* Register the insn jumped to.  */
22791       if (reverse)
22792         {
22793           if (!seeking_return)
22794             label = XEXP (SET_SRC (body), 0);
22795         }
22796       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22797         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22798       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22799         {
22800           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22801           then_not_else = FALSE;
22802         }
22803       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22804         {
22805           seeking_return = 1;
22806           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22807         }
22808       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22809         {
22810           seeking_return = 1;
22811           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22812           then_not_else = FALSE;
22813         }
22814       else
22815         gcc_unreachable ();
22816
22817       /* See how many insns this branch skips, and what kind of insns.  If all
22818          insns are okay, and the label or unconditional branch to the same
22819          label is not too far away, succeed.  */
22820       for (insns_skipped = 0;
22821            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22822         {
22823           rtx scanbody;
22824
22825           this_insn = next_nonnote_insn (this_insn);
22826           if (!this_insn)
22827             break;
22828
22829           switch (GET_CODE (this_insn))
22830             {
22831             case CODE_LABEL:
22832               /* Succeed if it is the target label, otherwise fail since
22833                  control falls in from somewhere else.  */
22834               if (this_insn == label)
22835                 {
22836                   arm_ccfsm_state = 1;
22837                   succeed = TRUE;
22838                 }
22839               else
22840                 fail = TRUE;
22841               break;
22842
22843             case BARRIER:
22844               /* Succeed if the following insn is the target label.
22845                  Otherwise fail.
22846                  If return insns are used then the last insn in a function
22847                  will be a barrier.  */
22848               this_insn = next_nonnote_insn (this_insn);
22849               if (this_insn && this_insn == label)
22850                 {
22851                   arm_ccfsm_state = 1;
22852                   succeed = TRUE;
22853                 }
22854               else
22855                 fail = TRUE;
22856               break;
22857
22858             case CALL_INSN:
22859               /* The AAPCS says that conditional calls should not be
22860                  used since they make interworking inefficient (the
22861                  linker can't transform BL<cond> into BLX).  That's
22862                  only a problem if the machine has BLX.  */
22863               if (arm_arch5)
22864                 {
22865                   fail = TRUE;
22866                   break;
22867                 }
22868
22869               /* Succeed if the following insn is the target label, or
22870                  if the following two insns are a barrier and the
22871                  target label.  */
22872               this_insn = next_nonnote_insn (this_insn);
22873               if (this_insn && BARRIER_P (this_insn))
22874                 this_insn = next_nonnote_insn (this_insn);
22875
22876               if (this_insn && this_insn == label
22877                   && insns_skipped < max_insns_skipped)
22878                 {
22879                   arm_ccfsm_state = 1;
22880                   succeed = TRUE;
22881                 }
22882               else
22883                 fail = TRUE;
22884               break;
22885
22886             case JUMP_INSN:
22887               /* If this is an unconditional branch to the same label, succeed.
22888                  If it is to another label, do nothing.  If it is conditional,
22889                  fail.  */
22890               /* XXX Probably, the tests for SET and the PC are
22891                  unnecessary.  */
22892
22893               scanbody = PATTERN (this_insn);
22894               if (GET_CODE (scanbody) == SET
22895                   && GET_CODE (SET_DEST (scanbody)) == PC)
22896                 {
22897                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22898                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22899                     {
22900                       arm_ccfsm_state = 2;
22901                       succeed = TRUE;
22902                     }
22903                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22904                     fail = TRUE;
22905                 }
22906               /* Fail if a conditional return is undesirable (e.g. on a
22907                  StrongARM), but still allow this if optimizing for size.  */
22908               else if (GET_CODE (scanbody) == return_code
22909                        && !use_return_insn (TRUE, NULL)
22910                        && !optimize_size)
22911                 fail = TRUE;
22912               else if (GET_CODE (scanbody) == return_code)
22913                 {
22914                   arm_ccfsm_state = 2;
22915                   succeed = TRUE;
22916                 }
22917               else if (GET_CODE (scanbody) == PARALLEL)
22918                 {
22919                   switch (get_attr_conds (this_insn))
22920                     {
22921                     case CONDS_NOCOND:
22922                       break;
22923                     default:
22924                       fail = TRUE;
22925                       break;
22926                     }
22927                 }
22928               else
22929                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22930
22931               break;
22932
22933             case INSN:
22934               /* Instructions using or affecting the condition codes make it
22935                  fail.  */
22936               scanbody = PATTERN (this_insn);
22937               if (!(GET_CODE (scanbody) == SET
22938                     || GET_CODE (scanbody) == PARALLEL)
22939                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22940                 fail = TRUE;
22941               break;
22942
22943             default:
22944               break;
22945             }
22946         }
22947       if (succeed)
22948         {
22949           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22950             arm_target_label = CODE_LABEL_NUMBER (label);
22951           else
22952             {
22953               gcc_assert (seeking_return || arm_ccfsm_state == 2);
22954
22955               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22956                 {
22957                   this_insn = next_nonnote_insn (this_insn);
22958                   gcc_assert (!this_insn
22959                               || (!BARRIER_P (this_insn)
22960                                   && !LABEL_P (this_insn)));
22961                 }
22962               if (!this_insn)
22963                 {
22964                   /* Oh, dear! we ran off the end.. give up.  */
22965                   extract_constrain_insn_cached (insn);
22966                   arm_ccfsm_state = 0;
22967                   arm_target_insn = NULL;
22968                   return;
22969                 }
22970               arm_target_insn = this_insn;
22971             }
22972
22973           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22974              what it was.  */
22975           if (!reverse)
22976             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22977
22978           if (reverse || then_not_else)
22979             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22980         }
22981
22982       /* Restore recog_data (getting the attributes of other insns can
22983          destroy this array, but final.c assumes that it remains intact
22984          across this call.  */
22985       extract_constrain_insn_cached (insn);
22986     }
22987 }
22988
22989 /* Output IT instructions.  */
22990 void
22991 thumb2_asm_output_opcode (FILE * stream)
22992 {
22993   char buff[5];
22994   int n;
22995
22996   if (arm_condexec_mask)
22997     {
22998       for (n = 0; n < arm_condexec_masklen; n++)
22999         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23000       buff[n] = 0;
23001       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23002                   arm_condition_codes[arm_current_cc]);
23003       arm_condexec_mask = 0;
23004     }
23005 }
23006
23007 /* Returns true if REGNO is a valid register
23008    for holding a quantity of type MODE.  */
23009 int
23010 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23011 {
23012   if (GET_MODE_CLASS (mode) == MODE_CC)
23013     return (regno == CC_REGNUM
23014             || (TARGET_HARD_FLOAT && TARGET_VFP
23015                 && regno == VFPCC_REGNUM));
23016
23017   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23018     return false;
23019
23020   if (TARGET_THUMB1)
23021     /* For the Thumb we only allow values bigger than SImode in
23022        registers 0 - 6, so that there is always a second low
23023        register available to hold the upper part of the value.
23024        We probably we ought to ensure that the register is the
23025        start of an even numbered register pair.  */
23026     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23027
23028   if (TARGET_HARD_FLOAT && TARGET_VFP
23029       && IS_VFP_REGNUM (regno))
23030     {
23031       if (mode == SFmode || mode == SImode)
23032         return VFP_REGNO_OK_FOR_SINGLE (regno);
23033
23034       if (mode == DFmode)
23035         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23036
23037       /* VFP registers can hold HFmode values, but there is no point in
23038          putting them there unless we have hardware conversion insns. */
23039       if (mode == HFmode)
23040         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23041
23042       if (TARGET_NEON)
23043         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23044                || (VALID_NEON_QREG_MODE (mode)
23045                    && NEON_REGNO_OK_FOR_QUAD (regno))
23046                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23047                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23048                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23049                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23050                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23051
23052       return FALSE;
23053     }
23054
23055   if (TARGET_REALLY_IWMMXT)
23056     {
23057       if (IS_IWMMXT_GR_REGNUM (regno))
23058         return mode == SImode;
23059
23060       if (IS_IWMMXT_REGNUM (regno))
23061         return VALID_IWMMXT_REG_MODE (mode);
23062     }
23063
23064   /* We allow almost any value to be stored in the general registers.
23065      Restrict doubleword quantities to even register pairs in ARM state
23066      so that we can use ldrd.  Do not allow very large Neon structure
23067      opaque modes in general registers; they would use too many.  */
23068   if (regno <= LAST_ARM_REGNUM)
23069     {
23070       if (ARM_NUM_REGS (mode) > 4)
23071           return FALSE;
23072
23073       if (TARGET_THUMB2)
23074         return TRUE;
23075
23076       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23077     }
23078
23079   if (regno == FRAME_POINTER_REGNUM
23080       || regno == ARG_POINTER_REGNUM)
23081     /* We only allow integers in the fake hard registers.  */
23082     return GET_MODE_CLASS (mode) == MODE_INT;
23083
23084   return FALSE;
23085 }
23086
23087 /* Implement MODES_TIEABLE_P.  */
23088
23089 bool
23090 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23091 {
23092   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23093     return true;
23094
23095   /* We specifically want to allow elements of "structure" modes to
23096      be tieable to the structure.  This more general condition allows
23097      other rarer situations too.  */
23098   if (TARGET_NEON
23099       && (VALID_NEON_DREG_MODE (mode1)
23100           || VALID_NEON_QREG_MODE (mode1)
23101           || VALID_NEON_STRUCT_MODE (mode1))
23102       && (VALID_NEON_DREG_MODE (mode2)
23103           || VALID_NEON_QREG_MODE (mode2)
23104           || VALID_NEON_STRUCT_MODE (mode2)))
23105     return true;
23106
23107   return false;
23108 }
23109
23110 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23111    not used in arm mode.  */
23112
23113 enum reg_class
23114 arm_regno_class (int regno)
23115 {
23116   if (regno == PC_REGNUM)
23117     return NO_REGS;
23118
23119   if (TARGET_THUMB1)
23120     {
23121       if (regno == STACK_POINTER_REGNUM)
23122         return STACK_REG;
23123       if (regno == CC_REGNUM)
23124         return CC_REG;
23125       if (regno < 8)
23126         return LO_REGS;
23127       return HI_REGS;
23128     }
23129
23130   if (TARGET_THUMB2 && regno < 8)
23131     return LO_REGS;
23132
23133   if (   regno <= LAST_ARM_REGNUM
23134       || regno == FRAME_POINTER_REGNUM
23135       || regno == ARG_POINTER_REGNUM)
23136     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23137
23138   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23139     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23140
23141   if (IS_VFP_REGNUM (regno))
23142     {
23143       if (regno <= D7_VFP_REGNUM)
23144         return VFP_D0_D7_REGS;
23145       else if (regno <= LAST_LO_VFP_REGNUM)
23146         return VFP_LO_REGS;
23147       else
23148         return VFP_HI_REGS;
23149     }
23150
23151   if (IS_IWMMXT_REGNUM (regno))
23152     return IWMMXT_REGS;
23153
23154   if (IS_IWMMXT_GR_REGNUM (regno))
23155     return IWMMXT_GR_REGS;
23156
23157   return NO_REGS;
23158 }
23159
23160 /* Handle a special case when computing the offset
23161    of an argument from the frame pointer.  */
23162 int
23163 arm_debugger_arg_offset (int value, rtx addr)
23164 {
23165   rtx_insn *insn;
23166
23167   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23168   if (value != 0)
23169     return 0;
23170
23171   /* We can only cope with the case where the address is held in a register.  */
23172   if (!REG_P (addr))
23173     return 0;
23174
23175   /* If we are using the frame pointer to point at the argument, then
23176      an offset of 0 is correct.  */
23177   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23178     return 0;
23179
23180   /* If we are using the stack pointer to point at the
23181      argument, then an offset of 0 is correct.  */
23182   /* ??? Check this is consistent with thumb2 frame layout.  */
23183   if ((TARGET_THUMB || !frame_pointer_needed)
23184       && REGNO (addr) == SP_REGNUM)
23185     return 0;
23186
23187   /* Oh dear.  The argument is pointed to by a register rather
23188      than being held in a register, or being stored at a known
23189      offset from the frame pointer.  Since GDB only understands
23190      those two kinds of argument we must translate the address
23191      held in the register into an offset from the frame pointer.
23192      We do this by searching through the insns for the function
23193      looking to see where this register gets its value.  If the
23194      register is initialized from the frame pointer plus an offset
23195      then we are in luck and we can continue, otherwise we give up.
23196
23197      This code is exercised by producing debugging information
23198      for a function with arguments like this:
23199
23200            double func (double a, double b, int c, double d) {return d;}
23201
23202      Without this code the stab for parameter 'd' will be set to
23203      an offset of 0 from the frame pointer, rather than 8.  */
23204
23205   /* The if() statement says:
23206
23207      If the insn is a normal instruction
23208      and if the insn is setting the value in a register
23209      and if the register being set is the register holding the address of the argument
23210      and if the address is computing by an addition
23211      that involves adding to a register
23212      which is the frame pointer
23213      a constant integer
23214
23215      then...  */
23216
23217   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23218     {
23219       if (   NONJUMP_INSN_P (insn)
23220           && GET_CODE (PATTERN (insn)) == SET
23221           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23222           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23223           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23224           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23225           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23226              )
23227         {
23228           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23229
23230           break;
23231         }
23232     }
23233
23234   if (value == 0)
23235     {
23236       debug_rtx (addr);
23237       warning (0, "unable to compute real location of stacked parameter");
23238       value = 8; /* XXX magic hack */
23239     }
23240
23241   return value;
23242 }
23243 \f
23244 typedef enum {
23245   T_V8QI,
23246   T_V4HI,
23247   T_V4HF,
23248   T_V2SI,
23249   T_V2SF,
23250   T_DI,
23251   T_V16QI,
23252   T_V8HI,
23253   T_V4SI,
23254   T_V4SF,
23255   T_V2DI,
23256   T_TI,
23257   T_EI,
23258   T_OI,
23259   T_MAX         /* Size of enum.  Keep last.  */
23260 } neon_builtin_type_mode;
23261
23262 #define TYPE_MODE_BIT(X) (1 << (X))
23263
23264 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)        \
23265                  | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI)      \
23266                  | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23267 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)       \
23268                  | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)      \
23269                  | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23270
23271 #define v8qi_UP  T_V8QI
23272 #define v4hi_UP  T_V4HI
23273 #define v4hf_UP  T_V4HF
23274 #define v2si_UP  T_V2SI
23275 #define v2sf_UP  T_V2SF
23276 #define di_UP    T_DI
23277 #define v16qi_UP T_V16QI
23278 #define v8hi_UP  T_V8HI
23279 #define v4si_UP  T_V4SI
23280 #define v4sf_UP  T_V4SF
23281 #define v2di_UP  T_V2DI
23282 #define ti_UP    T_TI
23283 #define ei_UP    T_EI
23284 #define oi_UP    T_OI
23285
23286 #define UP(X) X##_UP
23287
23288 typedef enum {
23289   NEON_BINOP,
23290   NEON_TERNOP,
23291   NEON_UNOP,
23292   NEON_BSWAP,
23293   NEON_GETLANE,
23294   NEON_SETLANE,
23295   NEON_CREATE,
23296   NEON_RINT,
23297   NEON_COPYSIGNF,
23298   NEON_DUP,
23299   NEON_DUPLANE,
23300   NEON_COMBINE,
23301   NEON_SPLIT,
23302   NEON_LANEMUL,
23303   NEON_LANEMULL,
23304   NEON_LANEMULH,
23305   NEON_LANEMAC,
23306   NEON_SCALARMUL,
23307   NEON_SCALARMULL,
23308   NEON_SCALARMULH,
23309   NEON_SCALARMAC,
23310   NEON_CONVERT,
23311   NEON_FLOAT_WIDEN,
23312   NEON_FLOAT_NARROW,
23313   NEON_FIXCONV,
23314   NEON_SELECT,
23315   NEON_REINTERP,
23316   NEON_VTBL,
23317   NEON_VTBX,
23318   NEON_LOAD1,
23319   NEON_LOAD1LANE,
23320   NEON_STORE1,
23321   NEON_STORE1LANE,
23322   NEON_LOADSTRUCT,
23323   NEON_LOADSTRUCTLANE,
23324   NEON_STORESTRUCT,
23325   NEON_STORESTRUCTLANE,
23326   NEON_LOGICBINOP,
23327   NEON_SHIFTINSERT,
23328   NEON_SHIFTIMM,
23329   NEON_SHIFTACC
23330 } neon_itype;
23331
23332 typedef struct {
23333   const char *name;
23334   const neon_itype itype;
23335   const neon_builtin_type_mode mode;
23336   const enum insn_code code;
23337   unsigned int fcode;
23338 } neon_builtin_datum;
23339
23340 #define CF(N,X) CODE_FOR_neon_##N##X
23341
23342 #define VAR1(T, N, A) \
23343   {#N, NEON_##T, UP (A), CF (N, A), 0}
23344 #define VAR2(T, N, A, B) \
23345   VAR1 (T, N, A), \
23346   {#N, NEON_##T, UP (B), CF (N, B), 0}
23347 #define VAR3(T, N, A, B, C) \
23348   VAR2 (T, N, A, B), \
23349   {#N, NEON_##T, UP (C), CF (N, C), 0}
23350 #define VAR4(T, N, A, B, C, D) \
23351   VAR3 (T, N, A, B, C), \
23352   {#N, NEON_##T, UP (D), CF (N, D), 0}
23353 #define VAR5(T, N, A, B, C, D, E) \
23354   VAR4 (T, N, A, B, C, D), \
23355   {#N, NEON_##T, UP (E), CF (N, E), 0}
23356 #define VAR6(T, N, A, B, C, D, E, F) \
23357   VAR5 (T, N, A, B, C, D, E), \
23358   {#N, NEON_##T, UP (F), CF (N, F), 0}
23359 #define VAR7(T, N, A, B, C, D, E, F, G) \
23360   VAR6 (T, N, A, B, C, D, E, F), \
23361   {#N, NEON_##T, UP (G), CF (N, G), 0}
23362 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23363   VAR7 (T, N, A, B, C, D, E, F, G), \
23364   {#N, NEON_##T, UP (H), CF (N, H), 0}
23365 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23366   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23367   {#N, NEON_##T, UP (I), CF (N, I), 0}
23368 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23369   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23370   {#N, NEON_##T, UP (J), CF (N, J), 0}
23371
23372 /* The NEON builtin data can be found in arm_neon_builtins.def.
23373    The mode entries in the following table correspond to the "key" type of the
23374    instruction variant, i.e. equivalent to that which would be specified after
23375    the assembler mnemonic, which usually refers to the last vector operand.
23376    (Signed/unsigned/polynomial types are not differentiated between though, and
23377    are all mapped onto the same mode for a given element size.) The modes
23378    listed per instruction should be the same as those defined for that
23379    instruction's pattern in neon.md.  */
23380
23381 static neon_builtin_datum neon_builtin_data[] =
23382 {
23383 #include "arm_neon_builtins.def"
23384 };
23385
23386 #undef CF
23387 #undef VAR1
23388 #undef VAR2
23389 #undef VAR3
23390 #undef VAR4
23391 #undef VAR5
23392 #undef VAR6
23393 #undef VAR7
23394 #undef VAR8
23395 #undef VAR9
23396 #undef VAR10
23397
23398 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23399 #define VAR1(T, N, A) \
23400   CF (N, A)
23401 #define VAR2(T, N, A, B) \
23402   VAR1 (T, N, A), \
23403   CF (N, B)
23404 #define VAR3(T, N, A, B, C) \
23405   VAR2 (T, N, A, B), \
23406   CF (N, C)
23407 #define VAR4(T, N, A, B, C, D) \
23408   VAR3 (T, N, A, B, C), \
23409   CF (N, D)
23410 #define VAR5(T, N, A, B, C, D, E) \
23411   VAR4 (T, N, A, B, C, D), \
23412   CF (N, E)
23413 #define VAR6(T, N, A, B, C, D, E, F) \
23414   VAR5 (T, N, A, B, C, D, E), \
23415   CF (N, F)
23416 #define VAR7(T, N, A, B, C, D, E, F, G) \
23417   VAR6 (T, N, A, B, C, D, E, F), \
23418   CF (N, G)
23419 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23420   VAR7 (T, N, A, B, C, D, E, F, G), \
23421   CF (N, H)
23422 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23423   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23424   CF (N, I)
23425 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23426   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23427   CF (N, J)
23428 enum arm_builtins
23429 {
23430   ARM_BUILTIN_GETWCGR0,
23431   ARM_BUILTIN_GETWCGR1,
23432   ARM_BUILTIN_GETWCGR2,
23433   ARM_BUILTIN_GETWCGR3,
23434
23435   ARM_BUILTIN_SETWCGR0,
23436   ARM_BUILTIN_SETWCGR1,
23437   ARM_BUILTIN_SETWCGR2,
23438   ARM_BUILTIN_SETWCGR3,
23439
23440   ARM_BUILTIN_WZERO,
23441
23442   ARM_BUILTIN_WAVG2BR,
23443   ARM_BUILTIN_WAVG2HR,
23444   ARM_BUILTIN_WAVG2B,
23445   ARM_BUILTIN_WAVG2H,
23446
23447   ARM_BUILTIN_WACCB,
23448   ARM_BUILTIN_WACCH,
23449   ARM_BUILTIN_WACCW,
23450
23451   ARM_BUILTIN_WMACS,
23452   ARM_BUILTIN_WMACSZ,
23453   ARM_BUILTIN_WMACU,
23454   ARM_BUILTIN_WMACUZ,
23455
23456   ARM_BUILTIN_WSADB,
23457   ARM_BUILTIN_WSADBZ,
23458   ARM_BUILTIN_WSADH,
23459   ARM_BUILTIN_WSADHZ,
23460
23461   ARM_BUILTIN_WALIGNI,
23462   ARM_BUILTIN_WALIGNR0,
23463   ARM_BUILTIN_WALIGNR1,
23464   ARM_BUILTIN_WALIGNR2,
23465   ARM_BUILTIN_WALIGNR3,
23466
23467   ARM_BUILTIN_TMIA,
23468   ARM_BUILTIN_TMIAPH,
23469   ARM_BUILTIN_TMIABB,
23470   ARM_BUILTIN_TMIABT,
23471   ARM_BUILTIN_TMIATB,
23472   ARM_BUILTIN_TMIATT,
23473
23474   ARM_BUILTIN_TMOVMSKB,
23475   ARM_BUILTIN_TMOVMSKH,
23476   ARM_BUILTIN_TMOVMSKW,
23477
23478   ARM_BUILTIN_TBCSTB,
23479   ARM_BUILTIN_TBCSTH,
23480   ARM_BUILTIN_TBCSTW,
23481
23482   ARM_BUILTIN_WMADDS,
23483   ARM_BUILTIN_WMADDU,
23484
23485   ARM_BUILTIN_WPACKHSS,
23486   ARM_BUILTIN_WPACKWSS,
23487   ARM_BUILTIN_WPACKDSS,
23488   ARM_BUILTIN_WPACKHUS,
23489   ARM_BUILTIN_WPACKWUS,
23490   ARM_BUILTIN_WPACKDUS,
23491
23492   ARM_BUILTIN_WADDB,
23493   ARM_BUILTIN_WADDH,
23494   ARM_BUILTIN_WADDW,
23495   ARM_BUILTIN_WADDSSB,
23496   ARM_BUILTIN_WADDSSH,
23497   ARM_BUILTIN_WADDSSW,
23498   ARM_BUILTIN_WADDUSB,
23499   ARM_BUILTIN_WADDUSH,
23500   ARM_BUILTIN_WADDUSW,
23501   ARM_BUILTIN_WSUBB,
23502   ARM_BUILTIN_WSUBH,
23503   ARM_BUILTIN_WSUBW,
23504   ARM_BUILTIN_WSUBSSB,
23505   ARM_BUILTIN_WSUBSSH,
23506   ARM_BUILTIN_WSUBSSW,
23507   ARM_BUILTIN_WSUBUSB,
23508   ARM_BUILTIN_WSUBUSH,
23509   ARM_BUILTIN_WSUBUSW,
23510
23511   ARM_BUILTIN_WAND,
23512   ARM_BUILTIN_WANDN,
23513   ARM_BUILTIN_WOR,
23514   ARM_BUILTIN_WXOR,
23515
23516   ARM_BUILTIN_WCMPEQB,
23517   ARM_BUILTIN_WCMPEQH,
23518   ARM_BUILTIN_WCMPEQW,
23519   ARM_BUILTIN_WCMPGTUB,
23520   ARM_BUILTIN_WCMPGTUH,
23521   ARM_BUILTIN_WCMPGTUW,
23522   ARM_BUILTIN_WCMPGTSB,
23523   ARM_BUILTIN_WCMPGTSH,
23524   ARM_BUILTIN_WCMPGTSW,
23525
23526   ARM_BUILTIN_TEXTRMSB,
23527   ARM_BUILTIN_TEXTRMSH,
23528   ARM_BUILTIN_TEXTRMSW,
23529   ARM_BUILTIN_TEXTRMUB,
23530   ARM_BUILTIN_TEXTRMUH,
23531   ARM_BUILTIN_TEXTRMUW,
23532   ARM_BUILTIN_TINSRB,
23533   ARM_BUILTIN_TINSRH,
23534   ARM_BUILTIN_TINSRW,
23535
23536   ARM_BUILTIN_WMAXSW,
23537   ARM_BUILTIN_WMAXSH,
23538   ARM_BUILTIN_WMAXSB,
23539   ARM_BUILTIN_WMAXUW,
23540   ARM_BUILTIN_WMAXUH,
23541   ARM_BUILTIN_WMAXUB,
23542   ARM_BUILTIN_WMINSW,
23543   ARM_BUILTIN_WMINSH,
23544   ARM_BUILTIN_WMINSB,
23545   ARM_BUILTIN_WMINUW,
23546   ARM_BUILTIN_WMINUH,
23547   ARM_BUILTIN_WMINUB,
23548
23549   ARM_BUILTIN_WMULUM,
23550   ARM_BUILTIN_WMULSM,
23551   ARM_BUILTIN_WMULUL,
23552
23553   ARM_BUILTIN_PSADBH,
23554   ARM_BUILTIN_WSHUFH,
23555
23556   ARM_BUILTIN_WSLLH,
23557   ARM_BUILTIN_WSLLW,
23558   ARM_BUILTIN_WSLLD,
23559   ARM_BUILTIN_WSRAH,
23560   ARM_BUILTIN_WSRAW,
23561   ARM_BUILTIN_WSRAD,
23562   ARM_BUILTIN_WSRLH,
23563   ARM_BUILTIN_WSRLW,
23564   ARM_BUILTIN_WSRLD,
23565   ARM_BUILTIN_WRORH,
23566   ARM_BUILTIN_WRORW,
23567   ARM_BUILTIN_WRORD,
23568   ARM_BUILTIN_WSLLHI,
23569   ARM_BUILTIN_WSLLWI,
23570   ARM_BUILTIN_WSLLDI,
23571   ARM_BUILTIN_WSRAHI,
23572   ARM_BUILTIN_WSRAWI,
23573   ARM_BUILTIN_WSRADI,
23574   ARM_BUILTIN_WSRLHI,
23575   ARM_BUILTIN_WSRLWI,
23576   ARM_BUILTIN_WSRLDI,
23577   ARM_BUILTIN_WRORHI,
23578   ARM_BUILTIN_WRORWI,
23579   ARM_BUILTIN_WRORDI,
23580
23581   ARM_BUILTIN_WUNPCKIHB,
23582   ARM_BUILTIN_WUNPCKIHH,
23583   ARM_BUILTIN_WUNPCKIHW,
23584   ARM_BUILTIN_WUNPCKILB,
23585   ARM_BUILTIN_WUNPCKILH,
23586   ARM_BUILTIN_WUNPCKILW,
23587
23588   ARM_BUILTIN_WUNPCKEHSB,
23589   ARM_BUILTIN_WUNPCKEHSH,
23590   ARM_BUILTIN_WUNPCKEHSW,
23591   ARM_BUILTIN_WUNPCKEHUB,
23592   ARM_BUILTIN_WUNPCKEHUH,
23593   ARM_BUILTIN_WUNPCKEHUW,
23594   ARM_BUILTIN_WUNPCKELSB,
23595   ARM_BUILTIN_WUNPCKELSH,
23596   ARM_BUILTIN_WUNPCKELSW,
23597   ARM_BUILTIN_WUNPCKELUB,
23598   ARM_BUILTIN_WUNPCKELUH,
23599   ARM_BUILTIN_WUNPCKELUW,
23600
23601   ARM_BUILTIN_WABSB,
23602   ARM_BUILTIN_WABSH,
23603   ARM_BUILTIN_WABSW,
23604
23605   ARM_BUILTIN_WADDSUBHX,
23606   ARM_BUILTIN_WSUBADDHX,
23607
23608   ARM_BUILTIN_WABSDIFFB,
23609   ARM_BUILTIN_WABSDIFFH,
23610   ARM_BUILTIN_WABSDIFFW,
23611
23612   ARM_BUILTIN_WADDCH,
23613   ARM_BUILTIN_WADDCW,
23614
23615   ARM_BUILTIN_WAVG4,
23616   ARM_BUILTIN_WAVG4R,
23617
23618   ARM_BUILTIN_WMADDSX,
23619   ARM_BUILTIN_WMADDUX,
23620
23621   ARM_BUILTIN_WMADDSN,
23622   ARM_BUILTIN_WMADDUN,
23623
23624   ARM_BUILTIN_WMULWSM,
23625   ARM_BUILTIN_WMULWUM,
23626
23627   ARM_BUILTIN_WMULWSMR,
23628   ARM_BUILTIN_WMULWUMR,
23629
23630   ARM_BUILTIN_WMULWL,
23631
23632   ARM_BUILTIN_WMULSMR,
23633   ARM_BUILTIN_WMULUMR,
23634
23635   ARM_BUILTIN_WQMULM,
23636   ARM_BUILTIN_WQMULMR,
23637
23638   ARM_BUILTIN_WQMULWM,
23639   ARM_BUILTIN_WQMULWMR,
23640
23641   ARM_BUILTIN_WADDBHUSM,
23642   ARM_BUILTIN_WADDBHUSL,
23643
23644   ARM_BUILTIN_WQMIABB,
23645   ARM_BUILTIN_WQMIABT,
23646   ARM_BUILTIN_WQMIATB,
23647   ARM_BUILTIN_WQMIATT,
23648
23649   ARM_BUILTIN_WQMIABBN,
23650   ARM_BUILTIN_WQMIABTN,
23651   ARM_BUILTIN_WQMIATBN,
23652   ARM_BUILTIN_WQMIATTN,
23653
23654   ARM_BUILTIN_WMIABB,
23655   ARM_BUILTIN_WMIABT,
23656   ARM_BUILTIN_WMIATB,
23657   ARM_BUILTIN_WMIATT,
23658
23659   ARM_BUILTIN_WMIABBN,
23660   ARM_BUILTIN_WMIABTN,
23661   ARM_BUILTIN_WMIATBN,
23662   ARM_BUILTIN_WMIATTN,
23663
23664   ARM_BUILTIN_WMIAWBB,
23665   ARM_BUILTIN_WMIAWBT,
23666   ARM_BUILTIN_WMIAWTB,
23667   ARM_BUILTIN_WMIAWTT,
23668
23669   ARM_BUILTIN_WMIAWBBN,
23670   ARM_BUILTIN_WMIAWBTN,
23671   ARM_BUILTIN_WMIAWTBN,
23672   ARM_BUILTIN_WMIAWTTN,
23673
23674   ARM_BUILTIN_WMERGE,
23675
23676   ARM_BUILTIN_CRC32B,
23677   ARM_BUILTIN_CRC32H,
23678   ARM_BUILTIN_CRC32W,
23679   ARM_BUILTIN_CRC32CB,
23680   ARM_BUILTIN_CRC32CH,
23681   ARM_BUILTIN_CRC32CW,
23682
23683   ARM_BUILTIN_GET_FPSCR,
23684   ARM_BUILTIN_SET_FPSCR,
23685
23686 #undef CRYPTO1
23687 #undef CRYPTO2
23688 #undef CRYPTO3
23689
23690 #define CRYPTO1(L, U, M1, M2) \
23691   ARM_BUILTIN_CRYPTO_##U,
23692 #define CRYPTO2(L, U, M1, M2, M3) \
23693   ARM_BUILTIN_CRYPTO_##U,
23694 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23695   ARM_BUILTIN_CRYPTO_##U,
23696
23697 #include "crypto.def"
23698
23699 #undef CRYPTO1
23700 #undef CRYPTO2
23701 #undef CRYPTO3
23702
23703 #include "arm_neon_builtins.def"
23704
23705   ,ARM_BUILTIN_MAX
23706 };
23707
23708 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23709
23710 #undef CF
23711 #undef VAR1
23712 #undef VAR2
23713 #undef VAR3
23714 #undef VAR4
23715 #undef VAR5
23716 #undef VAR6
23717 #undef VAR7
23718 #undef VAR8
23719 #undef VAR9
23720 #undef VAR10
23721
23722 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23723
23724 #define NUM_DREG_TYPES 5
23725 #define NUM_QREG_TYPES 6
23726
23727 static void
23728 arm_init_neon_builtins (void)
23729 {
23730   unsigned int i, fcode;
23731   tree decl;
23732
23733   tree neon_intQI_type_node;
23734   tree neon_intHI_type_node;
23735   tree neon_floatHF_type_node;
23736   tree neon_polyQI_type_node;
23737   tree neon_polyHI_type_node;
23738   tree neon_intSI_type_node;
23739   tree neon_intDI_type_node;
23740   tree neon_intUTI_type_node;
23741   tree neon_float_type_node;
23742
23743   tree intQI_pointer_node;
23744   tree intHI_pointer_node;
23745   tree intSI_pointer_node;
23746   tree intDI_pointer_node;
23747   tree float_pointer_node;
23748
23749   tree const_intQI_node;
23750   tree const_intHI_node;
23751   tree const_intSI_node;
23752   tree const_intDI_node;
23753   tree const_float_node;
23754
23755   tree const_intQI_pointer_node;
23756   tree const_intHI_pointer_node;
23757   tree const_intSI_pointer_node;
23758   tree const_intDI_pointer_node;
23759   tree const_float_pointer_node;
23760
23761   tree V8QI_type_node;
23762   tree V4HI_type_node;
23763   tree V4UHI_type_node;
23764   tree V4HF_type_node;
23765   tree V2SI_type_node;
23766   tree V2USI_type_node;
23767   tree V2SF_type_node;
23768   tree V16QI_type_node;
23769   tree V8HI_type_node;
23770   tree V8UHI_type_node;
23771   tree V4SI_type_node;
23772   tree V4USI_type_node;
23773   tree V4SF_type_node;
23774   tree V2DI_type_node;
23775   tree V2UDI_type_node;
23776
23777   tree intUQI_type_node;
23778   tree intUHI_type_node;
23779   tree intUSI_type_node;
23780   tree intUDI_type_node;
23781
23782   tree intEI_type_node;
23783   tree intOI_type_node;
23784   tree intCI_type_node;
23785   tree intXI_type_node;
23786
23787   tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23788   tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23789   tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23790
23791   /* Create distinguished type nodes for NEON vector element types,
23792      and pointers to values of such types, so we can detect them later.  */
23793   neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23794   neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23795   neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23796   neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23797   neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23798   neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23799   neon_float_type_node = make_node (REAL_TYPE);
23800   TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23801   layout_type (neon_float_type_node);
23802   neon_floatHF_type_node = make_node (REAL_TYPE);
23803   TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23804   layout_type (neon_floatHF_type_node);
23805
23806   /* Define typedefs which exactly correspond to the modes we are basing vector
23807      types on.  If you change these names you'll need to change
23808      the table used by arm_mangle_type too.  */
23809   (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23810                                              "__builtin_neon_qi");
23811   (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23812                                              "__builtin_neon_hi");
23813   (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23814                                              "__builtin_neon_hf");
23815   (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23816                                              "__builtin_neon_si");
23817   (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23818                                              "__builtin_neon_sf");
23819   (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23820                                              "__builtin_neon_di");
23821   (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23822                                              "__builtin_neon_poly8");
23823   (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23824                                              "__builtin_neon_poly16");
23825
23826   intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23827   intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23828   intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23829   intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23830   float_pointer_node = build_pointer_type (neon_float_type_node);
23831
23832   /* Next create constant-qualified versions of the above types.  */
23833   const_intQI_node = build_qualified_type (neon_intQI_type_node,
23834                                            TYPE_QUAL_CONST);
23835   const_intHI_node = build_qualified_type (neon_intHI_type_node,
23836                                            TYPE_QUAL_CONST);
23837   const_intSI_node = build_qualified_type (neon_intSI_type_node,
23838                                            TYPE_QUAL_CONST);
23839   const_intDI_node = build_qualified_type (neon_intDI_type_node,
23840                                            TYPE_QUAL_CONST);
23841   const_float_node = build_qualified_type (neon_float_type_node,
23842                                            TYPE_QUAL_CONST);
23843
23844   const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23845   const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23846   const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23847   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23848   const_float_pointer_node = build_pointer_type (const_float_node);
23849
23850   /* Unsigned integer types for various mode sizes.  */
23851   intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23852   intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23853   intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23854   intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23855   neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23856   /* Now create vector types based on our NEON element types.  */
23857   /* 64-bit vectors.  */
23858   V8QI_type_node =
23859     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23860   V4HI_type_node =
23861     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23862   V4UHI_type_node =
23863     build_vector_type_for_mode (intUHI_type_node, V4HImode);
23864   V4HF_type_node =
23865     build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23866   V2SI_type_node =
23867     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23868   V2USI_type_node =
23869     build_vector_type_for_mode (intUSI_type_node, V2SImode);
23870   V2SF_type_node =
23871     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23872   /* 128-bit vectors.  */
23873   V16QI_type_node =
23874     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23875   V8HI_type_node =
23876     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23877   V8UHI_type_node =
23878     build_vector_type_for_mode (intUHI_type_node, V8HImode);
23879   V4SI_type_node =
23880     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23881   V4USI_type_node =
23882     build_vector_type_for_mode (intUSI_type_node, V4SImode);
23883   V4SF_type_node =
23884     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23885   V2DI_type_node =
23886     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23887   V2UDI_type_node =
23888     build_vector_type_for_mode (intUDI_type_node, V2DImode);
23889
23890
23891   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23892                                              "__builtin_neon_uqi");
23893   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23894                                              "__builtin_neon_uhi");
23895   (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23896                                              "__builtin_neon_usi");
23897   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23898                                              "__builtin_neon_udi");
23899   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23900                                              "__builtin_neon_poly64");
23901   (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23902                                              "__builtin_neon_poly128");
23903
23904   /* Opaque integer types for structures of vectors.  */
23905   intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23906   intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23907   intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23908   intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23909
23910   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23911                                              "__builtin_neon_ti");
23912   (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23913                                              "__builtin_neon_ei");
23914   (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23915                                              "__builtin_neon_oi");
23916   (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23917                                              "__builtin_neon_ci");
23918   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23919                                              "__builtin_neon_xi");
23920
23921   if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23922   {
23923
23924     tree V16UQI_type_node =
23925       build_vector_type_for_mode (intUQI_type_node, V16QImode);
23926
23927     tree v16uqi_ftype_v16uqi
23928       = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23929
23930     tree v16uqi_ftype_v16uqi_v16uqi
23931       = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23932                                   V16UQI_type_node, NULL_TREE);
23933
23934     tree v4usi_ftype_v4usi
23935       = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23936
23937     tree v4usi_ftype_v4usi_v4usi
23938       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23939                                   V4USI_type_node, NULL_TREE);
23940
23941     tree v4usi_ftype_v4usi_v4usi_v4usi
23942       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23943                                   V4USI_type_node, V4USI_type_node, NULL_TREE);
23944
23945     tree uti_ftype_udi_udi
23946       = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23947                                   intUDI_type_node, NULL_TREE);
23948
23949     #undef CRYPTO1
23950     #undef CRYPTO2
23951     #undef CRYPTO3
23952     #undef C
23953     #undef N
23954     #undef CF
23955     #undef FT1
23956     #undef FT2
23957     #undef FT3
23958
23959     #define C(U) \
23960       ARM_BUILTIN_CRYPTO_##U
23961     #define N(L) \
23962       "__builtin_arm_crypto_"#L
23963     #define FT1(R, A) \
23964       R##_ftype_##A
23965     #define FT2(R, A1, A2) \
23966       R##_ftype_##A1##_##A2
23967     #define FT3(R, A1, A2, A3) \
23968       R##_ftype_##A1##_##A2##_##A3
23969     #define CRYPTO1(L, U, R, A) \
23970       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23971                                                        C (U), BUILT_IN_MD, \
23972                                                        NULL, NULL_TREE);
23973     #define CRYPTO2(L, U, R, A1, A2) \
23974       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23975                                                        C (U), BUILT_IN_MD, \
23976                                                        NULL, NULL_TREE);
23977
23978     #define CRYPTO3(L, U, R, A1, A2, A3) \
23979       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23980                                                        C (U), BUILT_IN_MD, \
23981                                                        NULL, NULL_TREE);
23982     #include "crypto.def"
23983
23984     #undef CRYPTO1
23985     #undef CRYPTO2
23986     #undef CRYPTO3
23987     #undef C
23988     #undef N
23989     #undef FT1
23990     #undef FT2
23991     #undef FT3
23992   }
23993   dreg_types[0] = V8QI_type_node;
23994   dreg_types[1] = V4HI_type_node;
23995   dreg_types[2] = V2SI_type_node;
23996   dreg_types[3] = V2SF_type_node;
23997   dreg_types[4] = neon_intDI_type_node;
23998
23999   qreg_types[0] = V16QI_type_node;
24000   qreg_types[1] = V8HI_type_node;
24001   qreg_types[2] = V4SI_type_node;
24002   qreg_types[3] = V4SF_type_node;
24003   qreg_types[4] = V2DI_type_node;
24004   qreg_types[5] = neon_intUTI_type_node;
24005
24006   for (i = 0; i < NUM_QREG_TYPES; i++)
24007     {
24008       int j;
24009       for (j = 0; j < NUM_QREG_TYPES; j++)
24010         {
24011           if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24012             reinterp_ftype_dreg[i][j]
24013               = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24014
24015           reinterp_ftype_qreg[i][j]
24016             = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24017         }
24018     }
24019
24020   for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24021        i < ARRAY_SIZE (neon_builtin_data);
24022        i++, fcode++)
24023     {
24024       neon_builtin_datum *d = &neon_builtin_data[i];
24025
24026       const char* const modenames[] = {
24027         "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24028         "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24029         "ti", "ei", "oi"
24030       };
24031       char namebuf[60];
24032       tree ftype = NULL;
24033       int is_load = 0, is_store = 0;
24034
24035       gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24036
24037       d->fcode = fcode;
24038
24039       switch (d->itype)
24040         {
24041         case NEON_LOAD1:
24042         case NEON_LOAD1LANE:
24043         case NEON_LOADSTRUCT:
24044         case NEON_LOADSTRUCTLANE:
24045           is_load = 1;
24046           /* Fall through.  */
24047         case NEON_STORE1:
24048         case NEON_STORE1LANE:
24049         case NEON_STORESTRUCT:
24050         case NEON_STORESTRUCTLANE:
24051           if (!is_load)
24052             is_store = 1;
24053           /* Fall through.  */
24054         case NEON_UNOP:
24055         case NEON_RINT:
24056         case NEON_BINOP:
24057         case NEON_LOGICBINOP:
24058         case NEON_SHIFTINSERT:
24059         case NEON_TERNOP:
24060         case NEON_GETLANE:
24061         case NEON_SETLANE:
24062         case NEON_CREATE:
24063         case NEON_DUP:
24064         case NEON_DUPLANE:
24065         case NEON_SHIFTIMM:
24066         case NEON_SHIFTACC:
24067         case NEON_COMBINE:
24068         case NEON_SPLIT:
24069         case NEON_CONVERT:
24070         case NEON_FIXCONV:
24071         case NEON_LANEMUL:
24072         case NEON_LANEMULL:
24073         case NEON_LANEMULH:
24074         case NEON_LANEMAC:
24075         case NEON_SCALARMUL:
24076         case NEON_SCALARMULL:
24077         case NEON_SCALARMULH:
24078         case NEON_SCALARMAC:
24079         case NEON_SELECT:
24080         case NEON_VTBL:
24081         case NEON_VTBX:
24082           {
24083             int k;
24084             tree return_type = void_type_node, args = void_list_node;
24085
24086             /* Build a function type directly from the insn_data for
24087                this builtin.  The build_function_type() function takes
24088                care of removing duplicates for us.  */
24089             for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24090               {
24091                 tree eltype;
24092
24093                 if (is_load && k == 1)
24094                   {
24095                     /* Neon load patterns always have the memory
24096                        operand in the operand 1 position.  */
24097                     gcc_assert (insn_data[d->code].operand[k].predicate
24098                                 == neon_struct_operand);
24099
24100                     switch (d->mode)
24101                       {
24102                       case T_V8QI:
24103                       case T_V16QI:
24104                         eltype = const_intQI_pointer_node;
24105                         break;
24106
24107                       case T_V4HI:
24108                       case T_V8HI:
24109                         eltype = const_intHI_pointer_node;
24110                         break;
24111
24112                       case T_V2SI:
24113                       case T_V4SI:
24114                         eltype = const_intSI_pointer_node;
24115                         break;
24116
24117                       case T_V2SF:
24118                       case T_V4SF:
24119                         eltype = const_float_pointer_node;
24120                         break;
24121
24122                       case T_DI:
24123                       case T_V2DI:
24124                         eltype = const_intDI_pointer_node;
24125                         break;
24126
24127                       default: gcc_unreachable ();
24128                       }
24129                   }
24130                 else if (is_store && k == 0)
24131                   {
24132                     /* Similarly, Neon store patterns use operand 0 as
24133                        the memory location to store to.  */
24134                     gcc_assert (insn_data[d->code].operand[k].predicate
24135                                 == neon_struct_operand);
24136
24137                     switch (d->mode)
24138                       {
24139                       case T_V8QI:
24140                       case T_V16QI:
24141                         eltype = intQI_pointer_node;
24142                         break;
24143
24144                       case T_V4HI:
24145                       case T_V8HI:
24146                         eltype = intHI_pointer_node;
24147                         break;
24148
24149                       case T_V2SI:
24150                       case T_V4SI:
24151                         eltype = intSI_pointer_node;
24152                         break;
24153
24154                       case T_V2SF:
24155                       case T_V4SF:
24156                         eltype = float_pointer_node;
24157                         break;
24158
24159                       case T_DI:
24160                       case T_V2DI:
24161                         eltype = intDI_pointer_node;
24162                         break;
24163
24164                       default: gcc_unreachable ();
24165                       }
24166                   }
24167                 else
24168                   {
24169                     switch (insn_data[d->code].operand[k].mode)
24170                       {
24171                       case VOIDmode: eltype = void_type_node; break;
24172                         /* Scalars.  */
24173                       case QImode: eltype = neon_intQI_type_node; break;
24174                       case HImode: eltype = neon_intHI_type_node; break;
24175                       case SImode: eltype = neon_intSI_type_node; break;
24176                       case SFmode: eltype = neon_float_type_node; break;
24177                       case DImode: eltype = neon_intDI_type_node; break;
24178                       case TImode: eltype = intTI_type_node; break;
24179                       case EImode: eltype = intEI_type_node; break;
24180                       case OImode: eltype = intOI_type_node; break;
24181                       case CImode: eltype = intCI_type_node; break;
24182                       case XImode: eltype = intXI_type_node; break;
24183                         /* 64-bit vectors.  */
24184                       case V8QImode: eltype = V8QI_type_node; break;
24185                       case V4HImode: eltype = V4HI_type_node; break;
24186                       case V2SImode: eltype = V2SI_type_node; break;
24187                       case V2SFmode: eltype = V2SF_type_node; break;
24188                         /* 128-bit vectors.  */
24189                       case V16QImode: eltype = V16QI_type_node; break;
24190                       case V8HImode: eltype = V8HI_type_node; break;
24191                       case V4SImode: eltype = V4SI_type_node; break;
24192                       case V4SFmode: eltype = V4SF_type_node; break;
24193                       case V2DImode: eltype = V2DI_type_node; break;
24194                       default: gcc_unreachable ();
24195                       }
24196                   }
24197
24198                 if (k == 0 && !is_store)
24199                   return_type = eltype;
24200                 else
24201                   args = tree_cons (NULL_TREE, eltype, args);
24202               }
24203
24204             ftype = build_function_type (return_type, args);
24205           }
24206           break;
24207
24208         case NEON_REINTERP:
24209           {
24210             /* We iterate over NUM_DREG_TYPES doubleword types,
24211                then NUM_QREG_TYPES quadword  types.
24212                V4HF is not a type used in reinterpret, so we translate
24213                d->mode to the correct index in reinterp_ftype_dreg.  */
24214             bool qreg_p
24215               = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24216             int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24217                       % NUM_QREG_TYPES;
24218             switch (insn_data[d->code].operand[0].mode)
24219               {
24220               case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24221               case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24222               case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24223               case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24224               case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24225               case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24226               case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24227               case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24228               case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24229               case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24230               case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24231               default: gcc_unreachable ();
24232               }
24233           }
24234           break;
24235         case NEON_FLOAT_WIDEN:
24236           {
24237             tree eltype = NULL_TREE;
24238             tree return_type = NULL_TREE;
24239
24240             switch (insn_data[d->code].operand[1].mode)
24241             {
24242               case V4HFmode:
24243                 eltype = V4HF_type_node;
24244                 return_type = V4SF_type_node;
24245                 break;
24246               default: gcc_unreachable ();
24247             }
24248             ftype = build_function_type_list (return_type, eltype, NULL);
24249             break;
24250           }
24251         case NEON_FLOAT_NARROW:
24252           {
24253             tree eltype = NULL_TREE;
24254             tree return_type = NULL_TREE;
24255
24256             switch (insn_data[d->code].operand[1].mode)
24257             {
24258               case V4SFmode:
24259                 eltype = V4SF_type_node;
24260                 return_type = V4HF_type_node;
24261                 break;
24262               default: gcc_unreachable ();
24263             }
24264             ftype = build_function_type_list (return_type, eltype, NULL);
24265             break;
24266           }
24267         case NEON_BSWAP:
24268         {
24269             tree eltype = NULL_TREE;
24270             switch (insn_data[d->code].operand[1].mode)
24271             {
24272               case V4HImode:
24273                 eltype = V4UHI_type_node;
24274                 break;
24275               case V8HImode:
24276                 eltype = V8UHI_type_node;
24277                 break;
24278               case V2SImode:
24279                 eltype = V2USI_type_node;
24280                 break;
24281               case V4SImode:
24282                 eltype = V4USI_type_node;
24283                 break;
24284               case V2DImode:
24285                 eltype = V2UDI_type_node;
24286                 break;
24287               default: gcc_unreachable ();
24288             }
24289             ftype = build_function_type_list (eltype, eltype, NULL);
24290             break;
24291         }
24292         case NEON_COPYSIGNF:
24293           {
24294             tree eltype = NULL_TREE;
24295             switch (insn_data[d->code].operand[1].mode)
24296               {
24297               case V2SFmode:
24298                 eltype = V2SF_type_node;
24299                 break;
24300               case V4SFmode:
24301                 eltype = V4SF_type_node;
24302                 break;
24303               default: gcc_unreachable ();
24304               }
24305             ftype = build_function_type_list (eltype, eltype, NULL);
24306             break;
24307           }
24308         default:
24309           gcc_unreachable ();
24310         }
24311
24312       gcc_assert (ftype != NULL);
24313
24314       sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24315
24316       decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24317                                    NULL_TREE);
24318       arm_builtin_decls[fcode] = decl;
24319     }
24320 }
24321
24322 #undef NUM_DREG_TYPES
24323 #undef NUM_QREG_TYPES
24324
24325 #define def_mbuiltin(MASK, NAME, TYPE, CODE)                            \
24326   do                                                                    \
24327     {                                                                   \
24328       if ((MASK) & insn_flags)                                          \
24329         {                                                               \
24330           tree bdecl;                                                   \
24331           bdecl = add_builtin_function ((NAME), (TYPE), (CODE),         \
24332                                         BUILT_IN_MD, NULL, NULL_TREE);  \
24333           arm_builtin_decls[CODE] = bdecl;                              \
24334         }                                                               \
24335     }                                                                   \
24336   while (0)
24337
24338 struct builtin_description
24339 {
24340   const unsigned int       mask;
24341   const enum insn_code     icode;
24342   const char * const       name;
24343   const enum arm_builtins  code;
24344   const enum rtx_code      comparison;
24345   const unsigned int       flag;
24346 };
24347
24348 static const struct builtin_description bdesc_2arg[] =
24349 {
24350 #define IWMMXT_BUILTIN(code, string, builtin) \
24351   { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24352     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24353
24354 #define IWMMXT2_BUILTIN(code, string, builtin) \
24355   { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24356     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24357
24358   IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24359   IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24360   IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24361   IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24362   IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24363   IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24364   IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24365   IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24366   IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24367   IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24368   IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24369   IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24370   IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24371   IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24372   IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24373   IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24374   IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24375   IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24376   IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24377   IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24378   IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24379   IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24380   IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24381   IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24382   IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24383   IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24384   IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24385   IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24386   IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24387   IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24388   IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24389   IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24390   IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24391   IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24392   IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24393   IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24394   IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24395   IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24396   IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24397   IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24398   IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24399   IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24400   IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24401   IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24402   IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24403   IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24404   IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24405   IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24406   IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24407   IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24408   IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24409   IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24410   IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24411   IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24412   IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24413   IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24414   IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24415   IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24416   IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24417   IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24418   IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24419   IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24420   IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24421   IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24422   IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24423   IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24424   IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24425   IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24426   IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24427   IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24428   IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24429   IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24430   IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24431   IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24432   IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24433   IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24434   IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24435   IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24436
24437 #define IWMMXT_BUILTIN2(code, builtin) \
24438   { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24439
24440 #define IWMMXT2_BUILTIN2(code, builtin) \
24441   { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24442
24443   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24444   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24445   IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24446   IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24447   IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24448   IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24449   IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24450   IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24451   IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24452   IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24453
24454
24455 #define FP_BUILTIN(L, U) \
24456   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24457    UNKNOWN, 0},
24458
24459   FP_BUILTIN (get_fpscr, GET_FPSCR)
24460   FP_BUILTIN (set_fpscr, SET_FPSCR)
24461 #undef FP_BUILTIN
24462
24463 #define CRC32_BUILTIN(L, U) \
24464   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24465    UNKNOWN, 0},
24466    CRC32_BUILTIN (crc32b, CRC32B)
24467    CRC32_BUILTIN (crc32h, CRC32H)
24468    CRC32_BUILTIN (crc32w, CRC32W)
24469    CRC32_BUILTIN (crc32cb, CRC32CB)
24470    CRC32_BUILTIN (crc32ch, CRC32CH)
24471    CRC32_BUILTIN (crc32cw, CRC32CW)
24472 #undef CRC32_BUILTIN
24473
24474
24475 #define CRYPTO_BUILTIN(L, U) \
24476   {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24477    UNKNOWN, 0},
24478 #undef CRYPTO1
24479 #undef CRYPTO2
24480 #undef CRYPTO3
24481 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24482 #define CRYPTO1(L, U, R, A)
24483 #define CRYPTO3(L, U, R, A1, A2, A3)
24484 #include "crypto.def"
24485 #undef CRYPTO1
24486 #undef CRYPTO2
24487 #undef CRYPTO3
24488
24489 };
24490
24491 static const struct builtin_description bdesc_1arg[] =
24492 {
24493   IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24494   IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24495   IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24496   IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24497   IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24498   IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24499   IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24500   IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24501   IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24502   IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24503   IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24504   IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24505   IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24506   IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24507   IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24508   IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24509   IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24510   IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24511   IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24512   IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24513   IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24514   IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24515   IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24516   IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24517
24518 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24519 #define CRYPTO2(L, U, R, A1, A2)
24520 #define CRYPTO3(L, U, R, A1, A2, A3)
24521 #include "crypto.def"
24522 #undef CRYPTO1
24523 #undef CRYPTO2
24524 #undef CRYPTO3
24525 };
24526
24527 static const struct builtin_description bdesc_3arg[] =
24528 {
24529 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24530 #define CRYPTO1(L, U, R, A)
24531 #define CRYPTO2(L, U, R, A1, A2)
24532 #include "crypto.def"
24533 #undef CRYPTO1
24534 #undef CRYPTO2
24535 #undef CRYPTO3
24536  };
24537 #undef CRYPTO_BUILTIN
24538
24539 /* Set up all the iWMMXt builtins.  This is not called if
24540    TARGET_IWMMXT is zero.  */
24541
24542 static void
24543 arm_init_iwmmxt_builtins (void)
24544 {
24545   const struct builtin_description * d;
24546   size_t i;
24547
24548   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24549   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24550   tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24551
24552   tree v8qi_ftype_v8qi_v8qi_int
24553     = build_function_type_list (V8QI_type_node,
24554                                 V8QI_type_node, V8QI_type_node,
24555                                 integer_type_node, NULL_TREE);
24556   tree v4hi_ftype_v4hi_int
24557     = build_function_type_list (V4HI_type_node,
24558                                 V4HI_type_node, integer_type_node, NULL_TREE);
24559   tree v2si_ftype_v2si_int
24560     = build_function_type_list (V2SI_type_node,
24561                                 V2SI_type_node, integer_type_node, NULL_TREE);
24562   tree v2si_ftype_di_di
24563     = build_function_type_list (V2SI_type_node,
24564                                 long_long_integer_type_node,
24565                                 long_long_integer_type_node,
24566                                 NULL_TREE);
24567   tree di_ftype_di_int
24568     = build_function_type_list (long_long_integer_type_node,
24569                                 long_long_integer_type_node,
24570                                 integer_type_node, NULL_TREE);
24571   tree di_ftype_di_int_int
24572     = build_function_type_list (long_long_integer_type_node,
24573                                 long_long_integer_type_node,
24574                                 integer_type_node,
24575                                 integer_type_node, NULL_TREE);
24576   tree int_ftype_v8qi
24577     = build_function_type_list (integer_type_node,
24578                                 V8QI_type_node, NULL_TREE);
24579   tree int_ftype_v4hi
24580     = build_function_type_list (integer_type_node,
24581                                 V4HI_type_node, NULL_TREE);
24582   tree int_ftype_v2si
24583     = build_function_type_list (integer_type_node,
24584                                 V2SI_type_node, NULL_TREE);
24585   tree int_ftype_v8qi_int
24586     = build_function_type_list (integer_type_node,
24587                                 V8QI_type_node, integer_type_node, NULL_TREE);
24588   tree int_ftype_v4hi_int
24589     = build_function_type_list (integer_type_node,
24590                                 V4HI_type_node, integer_type_node, NULL_TREE);
24591   tree int_ftype_v2si_int
24592     = build_function_type_list (integer_type_node,
24593                                 V2SI_type_node, integer_type_node, NULL_TREE);
24594   tree v8qi_ftype_v8qi_int_int
24595     = build_function_type_list (V8QI_type_node,
24596                                 V8QI_type_node, integer_type_node,
24597                                 integer_type_node, NULL_TREE);
24598   tree v4hi_ftype_v4hi_int_int
24599     = build_function_type_list (V4HI_type_node,
24600                                 V4HI_type_node, integer_type_node,
24601                                 integer_type_node, NULL_TREE);
24602   tree v2si_ftype_v2si_int_int
24603     = build_function_type_list (V2SI_type_node,
24604                                 V2SI_type_node, integer_type_node,
24605                                 integer_type_node, NULL_TREE);
24606   /* Miscellaneous.  */
24607   tree v8qi_ftype_v4hi_v4hi
24608     = build_function_type_list (V8QI_type_node,
24609                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24610   tree v4hi_ftype_v2si_v2si
24611     = build_function_type_list (V4HI_type_node,
24612                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24613   tree v8qi_ftype_v4hi_v8qi
24614     = build_function_type_list (V8QI_type_node,
24615                                 V4HI_type_node, V8QI_type_node, NULL_TREE);
24616   tree v2si_ftype_v4hi_v4hi
24617     = build_function_type_list (V2SI_type_node,
24618                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24619   tree v2si_ftype_v8qi_v8qi
24620     = build_function_type_list (V2SI_type_node,
24621                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24622   tree v4hi_ftype_v4hi_di
24623     = build_function_type_list (V4HI_type_node,
24624                                 V4HI_type_node, long_long_integer_type_node,
24625                                 NULL_TREE);
24626   tree v2si_ftype_v2si_di
24627     = build_function_type_list (V2SI_type_node,
24628                                 V2SI_type_node, long_long_integer_type_node,
24629                                 NULL_TREE);
24630   tree di_ftype_void
24631     = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24632   tree int_ftype_void
24633     = build_function_type_list (integer_type_node, NULL_TREE);
24634   tree di_ftype_v8qi
24635     = build_function_type_list (long_long_integer_type_node,
24636                                 V8QI_type_node, NULL_TREE);
24637   tree di_ftype_v4hi
24638     = build_function_type_list (long_long_integer_type_node,
24639                                 V4HI_type_node, NULL_TREE);
24640   tree di_ftype_v2si
24641     = build_function_type_list (long_long_integer_type_node,
24642                                 V2SI_type_node, NULL_TREE);
24643   tree v2si_ftype_v4hi
24644     = build_function_type_list (V2SI_type_node,
24645                                 V4HI_type_node, NULL_TREE);
24646   tree v4hi_ftype_v8qi
24647     = build_function_type_list (V4HI_type_node,
24648                                 V8QI_type_node, NULL_TREE);
24649   tree v8qi_ftype_v8qi
24650     = build_function_type_list (V8QI_type_node,
24651                                 V8QI_type_node, NULL_TREE);
24652   tree v4hi_ftype_v4hi
24653     = build_function_type_list (V4HI_type_node,
24654                                 V4HI_type_node, NULL_TREE);
24655   tree v2si_ftype_v2si
24656     = build_function_type_list (V2SI_type_node,
24657                                 V2SI_type_node, NULL_TREE);
24658
24659   tree di_ftype_di_v4hi_v4hi
24660     = build_function_type_list (long_long_unsigned_type_node,
24661                                 long_long_unsigned_type_node,
24662                                 V4HI_type_node, V4HI_type_node,
24663                                 NULL_TREE);
24664
24665   tree di_ftype_v4hi_v4hi
24666     = build_function_type_list (long_long_unsigned_type_node,
24667                                 V4HI_type_node,V4HI_type_node,
24668                                 NULL_TREE);
24669
24670   tree v2si_ftype_v2si_v4hi_v4hi
24671     = build_function_type_list (V2SI_type_node,
24672                                 V2SI_type_node, V4HI_type_node,
24673                                 V4HI_type_node, NULL_TREE);
24674
24675   tree v2si_ftype_v2si_v8qi_v8qi
24676     = build_function_type_list (V2SI_type_node,
24677                                 V2SI_type_node, V8QI_type_node,
24678                                 V8QI_type_node, NULL_TREE);
24679
24680   tree di_ftype_di_v2si_v2si
24681      = build_function_type_list (long_long_unsigned_type_node,
24682                                  long_long_unsigned_type_node,
24683                                  V2SI_type_node, V2SI_type_node,
24684                                  NULL_TREE);
24685
24686    tree di_ftype_di_di_int
24687      = build_function_type_list (long_long_unsigned_type_node,
24688                                  long_long_unsigned_type_node,
24689                                  long_long_unsigned_type_node,
24690                                  integer_type_node, NULL_TREE);
24691
24692    tree void_ftype_int
24693      = build_function_type_list (void_type_node,
24694                                  integer_type_node, NULL_TREE);
24695
24696    tree v8qi_ftype_char
24697      = build_function_type_list (V8QI_type_node,
24698                                  signed_char_type_node, NULL_TREE);
24699
24700    tree v4hi_ftype_short
24701      = build_function_type_list (V4HI_type_node,
24702                                  short_integer_type_node, NULL_TREE);
24703
24704    tree v2si_ftype_int
24705      = build_function_type_list (V2SI_type_node,
24706                                  integer_type_node, NULL_TREE);
24707
24708   /* Normal vector binops.  */
24709   tree v8qi_ftype_v8qi_v8qi
24710     = build_function_type_list (V8QI_type_node,
24711                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24712   tree v4hi_ftype_v4hi_v4hi
24713     = build_function_type_list (V4HI_type_node,
24714                                 V4HI_type_node,V4HI_type_node, NULL_TREE);
24715   tree v2si_ftype_v2si_v2si
24716     = build_function_type_list (V2SI_type_node,
24717                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24718   tree di_ftype_di_di
24719     = build_function_type_list (long_long_unsigned_type_node,
24720                                 long_long_unsigned_type_node,
24721                                 long_long_unsigned_type_node,
24722                                 NULL_TREE);
24723
24724   /* Add all builtins that are more or less simple operations on two
24725      operands.  */
24726   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24727     {
24728       /* Use one of the operands; the target can have a different mode for
24729          mask-generating compares.  */
24730       machine_mode mode;
24731       tree type;
24732
24733       if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24734         continue;
24735
24736       mode = insn_data[d->icode].operand[1].mode;
24737
24738       switch (mode)
24739         {
24740         case V8QImode:
24741           type = v8qi_ftype_v8qi_v8qi;
24742           break;
24743         case V4HImode:
24744           type = v4hi_ftype_v4hi_v4hi;
24745           break;
24746         case V2SImode:
24747           type = v2si_ftype_v2si_v2si;
24748           break;
24749         case DImode:
24750           type = di_ftype_di_di;
24751           break;
24752
24753         default:
24754           gcc_unreachable ();
24755         }
24756
24757       def_mbuiltin (d->mask, d->name, type, d->code);
24758     }
24759
24760   /* Add the remaining MMX insns with somewhat more complicated types.  */
24761 #define iwmmx_mbuiltin(NAME, TYPE, CODE)                        \
24762   def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),       \
24763                 ARM_BUILTIN_ ## CODE)
24764
24765 #define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
24766   def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE),     \
24767                ARM_BUILTIN_ ## CODE)
24768
24769   iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24770   iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24771   iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24772   iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24773   iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24774   iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24775   iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24776   iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24777   iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24778
24779   iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24780   iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24781   iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24782   iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24783   iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24784   iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24785
24786   iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24787   iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24788   iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24789   iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24790   iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24791   iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24792
24793   iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24794   iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24795   iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24796   iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24797   iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24798   iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24799
24800   iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24801   iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24802   iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24803   iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24804   iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24805   iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24806
24807   iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24808
24809   iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24810   iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24811   iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24812   iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24813   iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24814   iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24815   iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24816   iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24817   iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24818   iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24819
24820   iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24821   iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24822   iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24823   iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24824   iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24825   iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24826   iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24827   iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24828   iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24829
24830   iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24831   iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24832   iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24833
24834   iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24835   iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24836   iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24837
24838   iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24839   iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24840
24841   iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24842   iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24843   iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24844   iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24845   iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24846   iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24847
24848   iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24849   iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24850   iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24851   iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24852   iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24853   iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24854   iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24855   iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24856   iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24857   iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24858   iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24859   iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24860
24861   iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24862   iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24863   iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24864   iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24865
24866   iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24867   iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24868   iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24869   iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24870   iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24871   iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24872   iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24873
24874   iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24875   iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24876   iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24877
24878   iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24879   iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24880   iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24881   iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24882
24883   iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24884   iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24885   iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24886   iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24887
24888   iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24889   iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24890   iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24891   iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24892
24893   iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24894   iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24895   iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24896   iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24897
24898   iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24899   iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24900   iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24901   iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24902
24903   iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24904   iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24905   iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24906   iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24907
24908   iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24909
24910   iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24911   iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24912   iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24913
24914 #undef iwmmx_mbuiltin
24915 #undef iwmmx2_mbuiltin
24916 }
24917
24918 static void
24919 arm_init_fp16_builtins (void)
24920 {
24921   tree fp16_type = make_node (REAL_TYPE);
24922   TYPE_PRECISION (fp16_type) = 16;
24923   layout_type (fp16_type);
24924   (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24925 }
24926
24927 static void
24928 arm_init_crc32_builtins ()
24929 {
24930   tree si_ftype_si_qi
24931     = build_function_type_list (unsigned_intSI_type_node,
24932                                 unsigned_intSI_type_node,
24933                                 unsigned_intQI_type_node, NULL_TREE);
24934   tree si_ftype_si_hi
24935     = build_function_type_list (unsigned_intSI_type_node,
24936                                 unsigned_intSI_type_node,
24937                                 unsigned_intHI_type_node, NULL_TREE);
24938   tree si_ftype_si_si
24939     = build_function_type_list (unsigned_intSI_type_node,
24940                                 unsigned_intSI_type_node,
24941                                 unsigned_intSI_type_node, NULL_TREE);
24942
24943   arm_builtin_decls[ARM_BUILTIN_CRC32B]
24944     = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24945                             ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24946   arm_builtin_decls[ARM_BUILTIN_CRC32H]
24947     = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24948                             ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24949   arm_builtin_decls[ARM_BUILTIN_CRC32W]
24950     = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24951                             ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24952   arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24953     = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24954                             ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24955   arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24956     = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24957                             ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24958   arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24959     = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24960                             ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24961 }
24962
24963 static void
24964 arm_init_builtins (void)
24965 {
24966   if (TARGET_REALLY_IWMMXT)
24967     arm_init_iwmmxt_builtins ();
24968
24969   if (TARGET_NEON)
24970     arm_init_neon_builtins ();
24971
24972   if (arm_fp16_format)
24973     arm_init_fp16_builtins ();
24974
24975   if (TARGET_CRC32)
24976     arm_init_crc32_builtins ();
24977
24978   if (TARGET_VFP && TARGET_HARD_FLOAT)
24979     {
24980       tree ftype_set_fpscr
24981         = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24982       tree ftype_get_fpscr
24983         = build_function_type_list (unsigned_type_node, NULL);
24984
24985       arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24986         = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24987                                 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24988       arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24989         = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24990                                 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24991     }
24992 }
24993
24994 /* Return the ARM builtin for CODE.  */
24995
24996 static tree
24997 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24998 {
24999   if (code >= ARM_BUILTIN_MAX)
25000     return error_mark_node;
25001
25002   return arm_builtin_decls[code];
25003 }
25004
25005 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
25006
25007 static const char *
25008 arm_invalid_parameter_type (const_tree t)
25009 {
25010   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25011     return N_("function parameters cannot have __fp16 type");
25012   return NULL;
25013 }
25014
25015 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
25016
25017 static const char *
25018 arm_invalid_return_type (const_tree t)
25019 {
25020   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25021     return N_("functions cannot return __fp16 type");
25022   return NULL;
25023 }
25024
25025 /* Implement TARGET_PROMOTED_TYPE.  */
25026
25027 static tree
25028 arm_promoted_type (const_tree t)
25029 {
25030   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25031     return float_type_node;
25032   return NULL_TREE;
25033 }
25034
25035 /* Implement TARGET_CONVERT_TO_TYPE.
25036    Specifically, this hook implements the peculiarity of the ARM
25037    half-precision floating-point C semantics that requires conversions between
25038    __fp16 to or from double to do an intermediate conversion to float.  */
25039
25040 static tree
25041 arm_convert_to_type (tree type, tree expr)
25042 {
25043   tree fromtype = TREE_TYPE (expr);
25044   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25045     return NULL_TREE;
25046   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25047       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25048     return convert (type, convert (float_type_node, expr));
25049   return NULL_TREE;
25050 }
25051
25052 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25053    This simply adds HFmode as a supported mode; even though we don't
25054    implement arithmetic on this type directly, it's supported by
25055    optabs conversions, much the way the double-word arithmetic is
25056    special-cased in the default hook.  */
25057
25058 static bool
25059 arm_scalar_mode_supported_p (machine_mode mode)
25060 {
25061   if (mode == HFmode)
25062     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25063   else if (ALL_FIXED_POINT_MODE_P (mode))
25064     return true;
25065   else
25066     return default_scalar_mode_supported_p (mode);
25067 }
25068
25069 /* Errors in the source file can cause expand_expr to return const0_rtx
25070    where we expect a vector.  To avoid crashing, use one of the vector
25071    clear instructions.  */
25072
25073 static rtx
25074 safe_vector_operand (rtx x, machine_mode mode)
25075 {
25076   if (x != const0_rtx)
25077     return x;
25078   x = gen_reg_rtx (mode);
25079
25080   emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25081                                : gen_rtx_SUBREG (DImode, x, 0)));
25082   return x;
25083 }
25084
25085 /* Function to expand ternary builtins.  */
25086 static rtx
25087 arm_expand_ternop_builtin (enum insn_code icode,
25088                            tree exp, rtx target)
25089 {
25090   rtx pat;
25091   tree arg0 = CALL_EXPR_ARG (exp, 0);
25092   tree arg1 = CALL_EXPR_ARG (exp, 1);
25093   tree arg2 = CALL_EXPR_ARG (exp, 2);
25094
25095   rtx op0 = expand_normal (arg0);
25096   rtx op1 = expand_normal (arg1);
25097   rtx op2 = expand_normal (arg2);
25098   rtx op3 = NULL_RTX;
25099
25100   /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25101      lane operand depending on endianness.  */
25102   bool builtin_sha1cpm_p = false;
25103
25104   if (insn_data[icode].n_operands == 5)
25105     {
25106       gcc_assert (icode == CODE_FOR_crypto_sha1c
25107                   || icode == CODE_FOR_crypto_sha1p
25108                   || icode == CODE_FOR_crypto_sha1m);
25109       builtin_sha1cpm_p = true;
25110     }
25111   machine_mode tmode = insn_data[icode].operand[0].mode;
25112   machine_mode mode0 = insn_data[icode].operand[1].mode;
25113   machine_mode mode1 = insn_data[icode].operand[2].mode;
25114   machine_mode mode2 = insn_data[icode].operand[3].mode;
25115
25116
25117   if (VECTOR_MODE_P (mode0))
25118     op0 = safe_vector_operand (op0, mode0);
25119   if (VECTOR_MODE_P (mode1))
25120     op1 = safe_vector_operand (op1, mode1);
25121   if (VECTOR_MODE_P (mode2))
25122     op2 = safe_vector_operand (op2, mode2);
25123
25124   if (! target
25125       || GET_MODE (target) != tmode
25126       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25127     target = gen_reg_rtx (tmode);
25128
25129   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25130               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25131               && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25132
25133   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25134     op0 = copy_to_mode_reg (mode0, op0);
25135   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25136     op1 = copy_to_mode_reg (mode1, op1);
25137   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25138     op2 = copy_to_mode_reg (mode2, op2);
25139   if (builtin_sha1cpm_p)
25140     op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25141
25142   if (builtin_sha1cpm_p)
25143     pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25144   else
25145     pat = GEN_FCN (icode) (target, op0, op1, op2);
25146   if (! pat)
25147     return 0;
25148   emit_insn (pat);
25149   return target;
25150 }
25151
25152 /* Subroutine of arm_expand_builtin to take care of binop insns.  */
25153
25154 static rtx
25155 arm_expand_binop_builtin (enum insn_code icode,
25156                           tree exp, rtx target)
25157 {
25158   rtx pat;
25159   tree arg0 = CALL_EXPR_ARG (exp, 0);
25160   tree arg1 = CALL_EXPR_ARG (exp, 1);
25161   rtx op0 = expand_normal (arg0);
25162   rtx op1 = expand_normal (arg1);
25163   machine_mode tmode = insn_data[icode].operand[0].mode;
25164   machine_mode mode0 = insn_data[icode].operand[1].mode;
25165   machine_mode mode1 = insn_data[icode].operand[2].mode;
25166
25167   if (VECTOR_MODE_P (mode0))
25168     op0 = safe_vector_operand (op0, mode0);
25169   if (VECTOR_MODE_P (mode1))
25170     op1 = safe_vector_operand (op1, mode1);
25171
25172   if (! target
25173       || GET_MODE (target) != tmode
25174       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25175     target = gen_reg_rtx (tmode);
25176
25177   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25178               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25179
25180   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25181     op0 = copy_to_mode_reg (mode0, op0);
25182   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25183     op1 = copy_to_mode_reg (mode1, op1);
25184
25185   pat = GEN_FCN (icode) (target, op0, op1);
25186   if (! pat)
25187     return 0;
25188   emit_insn (pat);
25189   return target;
25190 }
25191
25192 /* Subroutine of arm_expand_builtin to take care of unop insns.  */
25193
25194 static rtx
25195 arm_expand_unop_builtin (enum insn_code icode,
25196                          tree exp, rtx target, int do_load)
25197 {
25198   rtx pat;
25199   tree arg0 = CALL_EXPR_ARG (exp, 0);
25200   rtx op0 = expand_normal (arg0);
25201   rtx op1 = NULL_RTX;
25202   machine_mode tmode = insn_data[icode].operand[0].mode;
25203   machine_mode mode0 = insn_data[icode].operand[1].mode;
25204   bool builtin_sha1h_p = false;
25205
25206   if (insn_data[icode].n_operands == 3)
25207     {
25208       gcc_assert (icode == CODE_FOR_crypto_sha1h);
25209       builtin_sha1h_p = true;
25210     }
25211
25212   if (! target
25213       || GET_MODE (target) != tmode
25214       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25215     target = gen_reg_rtx (tmode);
25216   if (do_load)
25217     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25218   else
25219     {
25220       if (VECTOR_MODE_P (mode0))
25221         op0 = safe_vector_operand (op0, mode0);
25222
25223       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25224         op0 = copy_to_mode_reg (mode0, op0);
25225     }
25226   if (builtin_sha1h_p)
25227     op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25228
25229   if (builtin_sha1h_p)
25230     pat = GEN_FCN (icode) (target, op0, op1);
25231   else
25232     pat = GEN_FCN (icode) (target, op0);
25233   if (! pat)
25234     return 0;
25235   emit_insn (pat);
25236   return target;
25237 }
25238
25239 typedef enum {
25240   NEON_ARG_COPY_TO_REG,
25241   NEON_ARG_CONSTANT,
25242   NEON_ARG_MEMORY,
25243   NEON_ARG_STOP
25244 } builtin_arg;
25245
25246 #define NEON_MAX_BUILTIN_ARGS 5
25247
25248 /* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
25249    and return an expression for the accessed memory.
25250
25251    The intrinsic function operates on a block of registers that has
25252    mode REG_MODE.  This block contains vectors of type TYPE_MODE.  The
25253    function references the memory at EXP of type TYPE and in mode
25254    MEM_MODE; this mode may be BLKmode if no more suitable mode is
25255    available.  */
25256
25257 static tree
25258 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25259                           machine_mode reg_mode,
25260                           neon_builtin_type_mode type_mode)
25261 {
25262   HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25263   tree elem_type, upper_bound, array_type;
25264
25265   /* Work out the size of the register block in bytes.  */
25266   reg_size = GET_MODE_SIZE (reg_mode);
25267
25268   /* Work out the size of each vector in bytes.  */
25269   gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25270   vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25271
25272   /* Work out how many vectors there are.  */
25273   gcc_assert (reg_size % vector_size == 0);
25274   nvectors = reg_size / vector_size;
25275
25276   /* Work out the type of each element.  */
25277   gcc_assert (POINTER_TYPE_P (type));
25278   elem_type = TREE_TYPE (type);
25279
25280   /* Work out how many elements are being loaded or stored.
25281      MEM_MODE == REG_MODE implies a one-to-one mapping between register
25282      and memory elements; anything else implies a lane load or store.  */
25283   if (mem_mode == reg_mode)
25284     nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25285   else
25286     nelems = nvectors;
25287
25288   /* Create a type that describes the full access.  */
25289   upper_bound = build_int_cst (size_type_node, nelems - 1);
25290   array_type = build_array_type (elem_type, build_index_type (upper_bound));
25291
25292   /* Dereference EXP using that type.  */
25293   return fold_build2 (MEM_REF, array_type, exp,
25294                       build_int_cst (build_pointer_type (array_type), 0));
25295 }
25296
25297 /* Expand a Neon builtin.  */
25298 static rtx
25299 arm_expand_neon_args (rtx target, int icode, int have_retval,
25300                       neon_builtin_type_mode type_mode,
25301                       tree exp, int fcode, ...)
25302 {
25303   va_list ap;
25304   rtx pat;
25305   tree arg[NEON_MAX_BUILTIN_ARGS];
25306   rtx op[NEON_MAX_BUILTIN_ARGS];
25307   tree arg_type;
25308   tree formals;
25309   machine_mode tmode = insn_data[icode].operand[0].mode;
25310   machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25311   machine_mode other_mode;
25312   int argc = 0;
25313   int opno;
25314
25315   if (have_retval
25316       && (!target
25317           || GET_MODE (target) != tmode
25318           || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25319     target = gen_reg_rtx (tmode);
25320
25321   va_start (ap, fcode);
25322
25323   formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25324
25325   for (;;)
25326     {
25327       builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25328
25329       if (thisarg == NEON_ARG_STOP)
25330         break;
25331       else
25332         {
25333           opno = argc + have_retval;
25334           mode[argc] = insn_data[icode].operand[opno].mode;
25335           arg[argc] = CALL_EXPR_ARG (exp, argc);
25336           arg_type = TREE_VALUE (formals);
25337           if (thisarg == NEON_ARG_MEMORY)
25338             {
25339               other_mode = insn_data[icode].operand[1 - opno].mode;
25340               arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25341                                                     mode[argc], other_mode,
25342                                                     type_mode);
25343             }
25344
25345           /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25346              be returned.  */
25347           op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25348                                   (thisarg == NEON_ARG_MEMORY
25349                                    ? EXPAND_MEMORY : EXPAND_NORMAL));
25350
25351           switch (thisarg)
25352             {
25353             case NEON_ARG_COPY_TO_REG:
25354               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25355               if (!(*insn_data[icode].operand[opno].predicate)
25356                      (op[argc], mode[argc]))
25357                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25358               break;
25359
25360             case NEON_ARG_CONSTANT:
25361               /* FIXME: This error message is somewhat unhelpful.  */
25362               if (!(*insn_data[icode].operand[opno].predicate)
25363                     (op[argc], mode[argc]))
25364                 error ("argument must be a constant");
25365               break;
25366
25367             case NEON_ARG_MEMORY:
25368               /* Check if expand failed.  */
25369               if (op[argc] == const0_rtx)
25370                 return 0;
25371               gcc_assert (MEM_P (op[argc]));
25372               PUT_MODE (op[argc], mode[argc]);
25373               /* ??? arm_neon.h uses the same built-in functions for signed
25374                  and unsigned accesses, casting where necessary.  This isn't
25375                  alias safe.  */
25376               set_mem_alias_set (op[argc], 0);
25377               if (!(*insn_data[icode].operand[opno].predicate)
25378                     (op[argc], mode[argc]))
25379                 op[argc] = (replace_equiv_address
25380                             (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25381               break;
25382
25383             case NEON_ARG_STOP:
25384               gcc_unreachable ();
25385             }
25386
25387           argc++;
25388           formals = TREE_CHAIN (formals);
25389         }
25390     }
25391
25392   va_end (ap);
25393
25394   if (have_retval)
25395     switch (argc)
25396       {
25397       case 1:
25398         pat = GEN_FCN (icode) (target, op[0]);
25399         break;
25400
25401       case 2:
25402         pat = GEN_FCN (icode) (target, op[0], op[1]);
25403         break;
25404
25405       case 3:
25406         pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25407         break;
25408
25409       case 4:
25410         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25411         break;
25412
25413       case 5:
25414         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25415         break;
25416
25417       default:
25418         gcc_unreachable ();
25419       }
25420   else
25421     switch (argc)
25422       {
25423       case 1:
25424         pat = GEN_FCN (icode) (op[0]);
25425         break;
25426
25427       case 2:
25428         pat = GEN_FCN (icode) (op[0], op[1]);
25429         break;
25430
25431       case 3:
25432         pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25433         break;
25434
25435       case 4:
25436         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25437         break;
25438
25439       case 5:
25440         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25441         break;
25442
25443       default:
25444         gcc_unreachable ();
25445       }
25446
25447   if (!pat)
25448     return 0;
25449
25450   emit_insn (pat);
25451
25452   return target;
25453 }
25454
25455 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25456    constants defined per-instruction or per instruction-variant. Instead, the
25457    required info is looked up in the table neon_builtin_data.  */
25458 static rtx
25459 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25460 {
25461   neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25462   neon_itype itype = d->itype;
25463   enum insn_code icode = d->code;
25464   neon_builtin_type_mode type_mode = d->mode;
25465
25466   switch (itype)
25467     {
25468     case NEON_UNOP:
25469     case NEON_CONVERT:
25470     case NEON_DUPLANE:
25471       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25472         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25473
25474     case NEON_BINOP:
25475     case NEON_LOGICBINOP:
25476     case NEON_SCALARMUL:
25477     case NEON_SCALARMULL:
25478     case NEON_SCALARMULH:
25479       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25480         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25481
25482     case NEON_TERNOP:
25483       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25484         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25485         NEON_ARG_STOP);
25486
25487     case NEON_GETLANE:
25488     case NEON_FIXCONV:
25489     case NEON_SHIFTIMM:
25490       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25491         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25492         NEON_ARG_STOP);
25493
25494     case NEON_CREATE:
25495       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25496         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25497
25498     case NEON_DUP:
25499     case NEON_RINT:
25500     case NEON_SPLIT:
25501     case NEON_FLOAT_WIDEN:
25502     case NEON_FLOAT_NARROW:
25503     case NEON_BSWAP:
25504     case NEON_REINTERP:
25505       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25506         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25507
25508     case NEON_COPYSIGNF:
25509     case NEON_COMBINE:
25510     case NEON_VTBL:
25511       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25512         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25513
25514     case NEON_LANEMUL:
25515     case NEON_LANEMULL:
25516     case NEON_LANEMULH:
25517     case NEON_SETLANE:
25518     case NEON_SHIFTINSERT:
25519       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25520         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25521         NEON_ARG_STOP);
25522
25523     case NEON_LANEMAC:
25524       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25525         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25526         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25527
25528     case NEON_SHIFTACC:
25529       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25530         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25531         NEON_ARG_STOP);
25532
25533     case NEON_SCALARMAC:
25534       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25535         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25536         NEON_ARG_STOP);
25537
25538     case NEON_SELECT:
25539     case NEON_VTBX:
25540       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25541         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25542         NEON_ARG_STOP);
25543
25544     case NEON_LOAD1:
25545     case NEON_LOADSTRUCT:
25546       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25547         NEON_ARG_MEMORY, NEON_ARG_STOP);
25548
25549     case NEON_LOAD1LANE:
25550     case NEON_LOADSTRUCTLANE:
25551       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25552         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25553         NEON_ARG_STOP);
25554
25555     case NEON_STORE1:
25556     case NEON_STORESTRUCT:
25557       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25558         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25559
25560     case NEON_STORE1LANE:
25561     case NEON_STORESTRUCTLANE:
25562       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25563         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25564         NEON_ARG_STOP);
25565     }
25566
25567   gcc_unreachable ();
25568 }
25569
25570 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
25571 void
25572 neon_reinterpret (rtx dest, rtx src)
25573 {
25574   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25575 }
25576
25577 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25578    not to early-clobber SRC registers in the process.
25579
25580    We assume that the operands described by SRC and DEST represent a
25581    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25582    number of components into which the copy has been decomposed.  */
25583 void
25584 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25585 {
25586   unsigned int i;
25587
25588   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25589       || REGNO (operands[0]) < REGNO (operands[1]))
25590     {
25591       for (i = 0; i < count; i++)
25592         {
25593           operands[2 * i] = dest[i];
25594           operands[2 * i + 1] = src[i];
25595         }
25596     }
25597   else
25598     {
25599       for (i = 0; i < count; i++)
25600         {
25601           operands[2 * i] = dest[count - i - 1];
25602           operands[2 * i + 1] = src[count - i - 1];
25603         }
25604     }
25605 }
25606
25607 /* Split operands into moves from op[1] + op[2] into op[0].  */
25608
25609 void
25610 neon_split_vcombine (rtx operands[3])
25611 {
25612   unsigned int dest = REGNO (operands[0]);
25613   unsigned int src1 = REGNO (operands[1]);
25614   unsigned int src2 = REGNO (operands[2]);
25615   machine_mode halfmode = GET_MODE (operands[1]);
25616   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25617   rtx destlo, desthi;
25618
25619   if (src1 == dest && src2 == dest + halfregs)
25620     {
25621       /* No-op move.  Can't split to nothing; emit something.  */
25622       emit_note (NOTE_INSN_DELETED);
25623       return;
25624     }
25625
25626   /* Preserve register attributes for variable tracking.  */
25627   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25628   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25629                                GET_MODE_SIZE (halfmode));
25630
25631   /* Special case of reversed high/low parts.  Use VSWP.  */
25632   if (src2 == dest && src1 == dest + halfregs)
25633     {
25634       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25635       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25636       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25637       return;
25638     }
25639
25640   if (!reg_overlap_mentioned_p (operands[2], destlo))
25641     {
25642       /* Try to avoid unnecessary moves if part of the result
25643          is in the right place already.  */
25644       if (src1 != dest)
25645         emit_move_insn (destlo, operands[1]);
25646       if (src2 != dest + halfregs)
25647         emit_move_insn (desthi, operands[2]);
25648     }
25649   else
25650     {
25651       if (src2 != dest + halfregs)
25652         emit_move_insn (desthi, operands[2]);
25653       if (src1 != dest)
25654         emit_move_insn (destlo, operands[1]);
25655     }
25656 }
25657
25658 /* Expand an expression EXP that calls a built-in function,
25659    with result going to TARGET if that's convenient
25660    (and in mode MODE if that's convenient).
25661    SUBTARGET may be used as the target for computing one of EXP's operands.
25662    IGNORE is nonzero if the value is to be ignored.  */
25663
25664 static rtx
25665 arm_expand_builtin (tree exp,
25666                     rtx target,
25667                     rtx subtarget ATTRIBUTE_UNUSED,
25668                     machine_mode mode ATTRIBUTE_UNUSED,
25669                     int ignore ATTRIBUTE_UNUSED)
25670 {
25671   const struct builtin_description * d;
25672   enum insn_code    icode;
25673   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25674   tree              arg0;
25675   tree              arg1;
25676   tree              arg2;
25677   rtx               op0;
25678   rtx               op1;
25679   rtx               op2;
25680   rtx               pat;
25681   unsigned int      fcode = DECL_FUNCTION_CODE (fndecl);
25682   size_t            i;
25683   machine_mode tmode;
25684   machine_mode mode0;
25685   machine_mode mode1;
25686   machine_mode mode2;
25687   int opint;
25688   int selector;
25689   int mask;
25690   int imm;
25691
25692   if (fcode >= ARM_BUILTIN_NEON_BASE)
25693     return arm_expand_neon_builtin (fcode, exp, target);
25694
25695   switch (fcode)
25696     {
25697     case ARM_BUILTIN_GET_FPSCR:
25698     case ARM_BUILTIN_SET_FPSCR:
25699       if (fcode == ARM_BUILTIN_GET_FPSCR)
25700         {
25701           icode = CODE_FOR_get_fpscr;
25702           target = gen_reg_rtx (SImode);
25703           pat = GEN_FCN (icode) (target);
25704         }
25705       else
25706         {
25707           target = NULL_RTX;
25708           icode = CODE_FOR_set_fpscr;
25709           arg0 = CALL_EXPR_ARG (exp, 0);
25710           op0 = expand_normal (arg0);
25711           pat = GEN_FCN (icode) (op0);
25712         }
25713       emit_insn (pat);
25714       return target;
25715
25716     case ARM_BUILTIN_TEXTRMSB:
25717     case ARM_BUILTIN_TEXTRMUB:
25718     case ARM_BUILTIN_TEXTRMSH:
25719     case ARM_BUILTIN_TEXTRMUH:
25720     case ARM_BUILTIN_TEXTRMSW:
25721     case ARM_BUILTIN_TEXTRMUW:
25722       icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25723                : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25724                : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25725                : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25726                : CODE_FOR_iwmmxt_textrmw);
25727
25728       arg0 = CALL_EXPR_ARG (exp, 0);
25729       arg1 = CALL_EXPR_ARG (exp, 1);
25730       op0 = expand_normal (arg0);
25731       op1 = expand_normal (arg1);
25732       tmode = insn_data[icode].operand[0].mode;
25733       mode0 = insn_data[icode].operand[1].mode;
25734       mode1 = insn_data[icode].operand[2].mode;
25735
25736       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25737         op0 = copy_to_mode_reg (mode0, op0);
25738       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25739         {
25740           /* @@@ better error message */
25741           error ("selector must be an immediate");
25742           return gen_reg_rtx (tmode);
25743         }
25744
25745       opint = INTVAL (op1);
25746       if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25747         {
25748           if (opint > 7 || opint < 0)
25749             error ("the range of selector should be in 0 to 7");
25750         }
25751       else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25752         {
25753           if (opint > 3 || opint < 0)
25754             error ("the range of selector should be in 0 to 3");
25755         }
25756       else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
25757         {
25758           if (opint > 1 || opint < 0)
25759             error ("the range of selector should be in 0 to 1");
25760         }
25761
25762       if (target == 0
25763           || GET_MODE (target) != tmode
25764           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25765         target = gen_reg_rtx (tmode);
25766       pat = GEN_FCN (icode) (target, op0, op1);
25767       if (! pat)
25768         return 0;
25769       emit_insn (pat);
25770       return target;
25771
25772     case ARM_BUILTIN_WALIGNI:
25773       /* If op2 is immediate, call walighi, else call walighr.  */
25774       arg0 = CALL_EXPR_ARG (exp, 0);
25775       arg1 = CALL_EXPR_ARG (exp, 1);
25776       arg2 = CALL_EXPR_ARG (exp, 2);
25777       op0 = expand_normal (arg0);
25778       op1 = expand_normal (arg1);
25779       op2 = expand_normal (arg2);
25780       if (CONST_INT_P (op2))
25781         {
25782           icode = CODE_FOR_iwmmxt_waligni;
25783           tmode = insn_data[icode].operand[0].mode;
25784           mode0 = insn_data[icode].operand[1].mode;
25785           mode1 = insn_data[icode].operand[2].mode;
25786           mode2 = insn_data[icode].operand[3].mode;
25787           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25788             op0 = copy_to_mode_reg (mode0, op0);
25789           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25790             op1 = copy_to_mode_reg (mode1, op1);
25791           gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25792           selector = INTVAL (op2);
25793           if (selector > 7 || selector < 0)
25794             error ("the range of selector should be in 0 to 7");
25795         }
25796       else
25797         {
25798           icode = CODE_FOR_iwmmxt_walignr;
25799           tmode = insn_data[icode].operand[0].mode;
25800           mode0 = insn_data[icode].operand[1].mode;
25801           mode1 = insn_data[icode].operand[2].mode;
25802           mode2 = insn_data[icode].operand[3].mode;
25803           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25804             op0 = copy_to_mode_reg (mode0, op0);
25805           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25806             op1 = copy_to_mode_reg (mode1, op1);
25807           if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25808             op2 = copy_to_mode_reg (mode2, op2);
25809         }
25810       if (target == 0
25811           || GET_MODE (target) != tmode
25812           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25813         target = gen_reg_rtx (tmode);
25814       pat = GEN_FCN (icode) (target, op0, op1, op2);
25815       if (!pat)
25816         return 0;
25817       emit_insn (pat);
25818       return target;
25819
25820     case ARM_BUILTIN_TINSRB:
25821     case ARM_BUILTIN_TINSRH:
25822     case ARM_BUILTIN_TINSRW:
25823     case ARM_BUILTIN_WMERGE:
25824       icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25825                : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25826                : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25827                : CODE_FOR_iwmmxt_tinsrw);
25828       arg0 = CALL_EXPR_ARG (exp, 0);
25829       arg1 = CALL_EXPR_ARG (exp, 1);
25830       arg2 = CALL_EXPR_ARG (exp, 2);
25831       op0 = expand_normal (arg0);
25832       op1 = expand_normal (arg1);
25833       op2 = expand_normal (arg2);
25834       tmode = insn_data[icode].operand[0].mode;
25835       mode0 = insn_data[icode].operand[1].mode;
25836       mode1 = insn_data[icode].operand[2].mode;
25837       mode2 = insn_data[icode].operand[3].mode;
25838
25839       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25840         op0 = copy_to_mode_reg (mode0, op0);
25841       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25842         op1 = copy_to_mode_reg (mode1, op1);
25843       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25844         {
25845           error ("selector must be an immediate");
25846           return const0_rtx;
25847         }
25848       if (icode == CODE_FOR_iwmmxt_wmerge)
25849         {
25850           selector = INTVAL (op2);
25851           if (selector > 7 || selector < 0)
25852             error ("the range of selector should be in 0 to 7");
25853         }
25854       if ((icode == CODE_FOR_iwmmxt_tinsrb)
25855           || (icode == CODE_FOR_iwmmxt_tinsrh)
25856           || (icode == CODE_FOR_iwmmxt_tinsrw))
25857         {
25858           mask = 0x01;
25859           selector= INTVAL (op2);
25860           if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25861             error ("the range of selector should be in 0 to 7");
25862           else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25863             error ("the range of selector should be in 0 to 3");
25864           else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25865             error ("the range of selector should be in 0 to 1");
25866           mask <<= selector;
25867           op2 = GEN_INT (mask);
25868         }
25869       if (target == 0
25870           || GET_MODE (target) != tmode
25871           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25872         target = gen_reg_rtx (tmode);
25873       pat = GEN_FCN (icode) (target, op0, op1, op2);
25874       if (! pat)
25875         return 0;
25876       emit_insn (pat);
25877       return target;
25878
25879     case ARM_BUILTIN_SETWCGR0:
25880     case ARM_BUILTIN_SETWCGR1:
25881     case ARM_BUILTIN_SETWCGR2:
25882     case ARM_BUILTIN_SETWCGR3:
25883       icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25884                : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25885                : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25886                : CODE_FOR_iwmmxt_setwcgr3);
25887       arg0 = CALL_EXPR_ARG (exp, 0);
25888       op0 = expand_normal (arg0);
25889       mode0 = insn_data[icode].operand[0].mode;
25890       if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25891         op0 = copy_to_mode_reg (mode0, op0);
25892       pat = GEN_FCN (icode) (op0);
25893       if (!pat)
25894         return 0;
25895       emit_insn (pat);
25896       return 0;
25897
25898     case ARM_BUILTIN_GETWCGR0:
25899     case ARM_BUILTIN_GETWCGR1:
25900     case ARM_BUILTIN_GETWCGR2:
25901     case ARM_BUILTIN_GETWCGR3:
25902       icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25903                : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25904                : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25905                : CODE_FOR_iwmmxt_getwcgr3);
25906       tmode = insn_data[icode].operand[0].mode;
25907       if (target == 0
25908           || GET_MODE (target) != tmode
25909           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25910         target = gen_reg_rtx (tmode);
25911       pat = GEN_FCN (icode) (target);
25912       if (!pat)
25913         return 0;
25914       emit_insn (pat);
25915       return target;
25916
25917     case ARM_BUILTIN_WSHUFH:
25918       icode = CODE_FOR_iwmmxt_wshufh;
25919       arg0 = CALL_EXPR_ARG (exp, 0);
25920       arg1 = CALL_EXPR_ARG (exp, 1);
25921       op0 = expand_normal (arg0);
25922       op1 = expand_normal (arg1);
25923       tmode = insn_data[icode].operand[0].mode;
25924       mode1 = insn_data[icode].operand[1].mode;
25925       mode2 = insn_data[icode].operand[2].mode;
25926
25927       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25928         op0 = copy_to_mode_reg (mode1, op0);
25929       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25930         {
25931           error ("mask must be an immediate");
25932           return const0_rtx;
25933         }
25934       selector = INTVAL (op1);
25935       if (selector < 0 || selector > 255)
25936         error ("the range of mask should be in 0 to 255");
25937       if (target == 0
25938           || GET_MODE (target) != tmode
25939           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25940         target = gen_reg_rtx (tmode);
25941       pat = GEN_FCN (icode) (target, op0, op1);
25942       if (! pat)
25943         return 0;
25944       emit_insn (pat);
25945       return target;
25946
25947     case ARM_BUILTIN_WMADDS:
25948       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25949     case ARM_BUILTIN_WMADDSX:
25950       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25951     case ARM_BUILTIN_WMADDSN:
25952       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25953     case ARM_BUILTIN_WMADDU:
25954       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25955     case ARM_BUILTIN_WMADDUX:
25956       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25957     case ARM_BUILTIN_WMADDUN:
25958       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25959     case ARM_BUILTIN_WSADBZ:
25960       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25961     case ARM_BUILTIN_WSADHZ:
25962       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25963
25964       /* Several three-argument builtins.  */
25965     case ARM_BUILTIN_WMACS:
25966     case ARM_BUILTIN_WMACU:
25967     case ARM_BUILTIN_TMIA:
25968     case ARM_BUILTIN_TMIAPH:
25969     case ARM_BUILTIN_TMIATT:
25970     case ARM_BUILTIN_TMIATB:
25971     case ARM_BUILTIN_TMIABT:
25972     case ARM_BUILTIN_TMIABB:
25973     case ARM_BUILTIN_WQMIABB:
25974     case ARM_BUILTIN_WQMIABT:
25975     case ARM_BUILTIN_WQMIATB:
25976     case ARM_BUILTIN_WQMIATT:
25977     case ARM_BUILTIN_WQMIABBN:
25978     case ARM_BUILTIN_WQMIABTN:
25979     case ARM_BUILTIN_WQMIATBN:
25980     case ARM_BUILTIN_WQMIATTN:
25981     case ARM_BUILTIN_WMIABB:
25982     case ARM_BUILTIN_WMIABT:
25983     case ARM_BUILTIN_WMIATB:
25984     case ARM_BUILTIN_WMIATT:
25985     case ARM_BUILTIN_WMIABBN:
25986     case ARM_BUILTIN_WMIABTN:
25987     case ARM_BUILTIN_WMIATBN:
25988     case ARM_BUILTIN_WMIATTN:
25989     case ARM_BUILTIN_WMIAWBB:
25990     case ARM_BUILTIN_WMIAWBT:
25991     case ARM_BUILTIN_WMIAWTB:
25992     case ARM_BUILTIN_WMIAWTT:
25993     case ARM_BUILTIN_WMIAWBBN:
25994     case ARM_BUILTIN_WMIAWBTN:
25995     case ARM_BUILTIN_WMIAWTBN:
25996     case ARM_BUILTIN_WMIAWTTN:
25997     case ARM_BUILTIN_WSADB:
25998     case ARM_BUILTIN_WSADH:
25999       icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26000                : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26001                : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26002                : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26003                : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26004                : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26005                : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26006                : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26007                : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26008                : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26009                : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26010                : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26011                : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26012                : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26013                : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26014                : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26015                : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26016                : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26017                : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26018                : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26019                : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26020                : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26021                : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26022                : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26023                : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26024                : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26025                : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26026                : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26027                : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26028                : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26029                : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26030                : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26031                : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26032                : CODE_FOR_iwmmxt_wsadh);
26033       arg0 = CALL_EXPR_ARG (exp, 0);
26034       arg1 = CALL_EXPR_ARG (exp, 1);
26035       arg2 = CALL_EXPR_ARG (exp, 2);
26036       op0 = expand_normal (arg0);
26037       op1 = expand_normal (arg1);
26038       op2 = expand_normal (arg2);
26039       tmode = insn_data[icode].operand[0].mode;
26040       mode0 = insn_data[icode].operand[1].mode;
26041       mode1 = insn_data[icode].operand[2].mode;
26042       mode2 = insn_data[icode].operand[3].mode;
26043
26044       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26045         op0 = copy_to_mode_reg (mode0, op0);
26046       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26047         op1 = copy_to_mode_reg (mode1, op1);
26048       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26049         op2 = copy_to_mode_reg (mode2, op2);
26050       if (target == 0
26051           || GET_MODE (target) != tmode
26052           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26053         target = gen_reg_rtx (tmode);
26054       pat = GEN_FCN (icode) (target, op0, op1, op2);
26055       if (! pat)
26056         return 0;
26057       emit_insn (pat);
26058       return target;
26059
26060     case ARM_BUILTIN_WZERO:
26061       target = gen_reg_rtx (DImode);
26062       emit_insn (gen_iwmmxt_clrdi (target));
26063       return target;
26064
26065     case ARM_BUILTIN_WSRLHI:
26066     case ARM_BUILTIN_WSRLWI:
26067     case ARM_BUILTIN_WSRLDI:
26068     case ARM_BUILTIN_WSLLHI:
26069     case ARM_BUILTIN_WSLLWI:
26070     case ARM_BUILTIN_WSLLDI:
26071     case ARM_BUILTIN_WSRAHI:
26072     case ARM_BUILTIN_WSRAWI:
26073     case ARM_BUILTIN_WSRADI:
26074     case ARM_BUILTIN_WRORHI:
26075     case ARM_BUILTIN_WRORWI:
26076     case ARM_BUILTIN_WRORDI:
26077     case ARM_BUILTIN_WSRLH:
26078     case ARM_BUILTIN_WSRLW:
26079     case ARM_BUILTIN_WSRLD:
26080     case ARM_BUILTIN_WSLLH:
26081     case ARM_BUILTIN_WSLLW:
26082     case ARM_BUILTIN_WSLLD:
26083     case ARM_BUILTIN_WSRAH:
26084     case ARM_BUILTIN_WSRAW:
26085     case ARM_BUILTIN_WSRAD:
26086     case ARM_BUILTIN_WRORH:
26087     case ARM_BUILTIN_WRORW:
26088     case ARM_BUILTIN_WRORD:
26089       icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26090                : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26091                : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26092                : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26093                : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26094                : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26095                : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26096                : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26097                : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26098                : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26099                : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26100                : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26101                : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
26102                : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
26103                : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
26104                : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
26105                : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
26106                : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
26107                : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
26108                : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
26109                : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
26110                : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
26111                : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
26112                : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
26113                : CODE_FOR_nothing);
26114       arg1 = CALL_EXPR_ARG (exp, 1);
26115       op1 = expand_normal (arg1);
26116       if (GET_MODE (op1) == VOIDmode)
26117         {
26118           imm = INTVAL (op1);
26119           if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26120                || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26121               && (imm < 0 || imm > 32))
26122             {
26123               if (fcode == ARM_BUILTIN_WRORHI)
26124                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi16 in code.");
26125               else if (fcode == ARM_BUILTIN_WRORWI)
26126                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi32 in code.");
26127               else if (fcode == ARM_BUILTIN_WRORH)
26128                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi16 in code.");
26129               else
26130                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi32 in code.");
26131             }
26132           else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26133                    && (imm < 0 || imm > 64))
26134             {
26135               if (fcode == ARM_BUILTIN_WRORDI)
26136                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_rori_si64 in code.");
26137               else
26138                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_ror_si64 in code.");
26139             }
26140           else if (imm < 0)
26141             {
26142               if (fcode == ARM_BUILTIN_WSRLHI)
26143                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi16 in code.");
26144               else if (fcode == ARM_BUILTIN_WSRLWI)
26145                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi32 in code.");
26146               else if (fcode == ARM_BUILTIN_WSRLDI)
26147                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_si64 in code.");
26148               else if (fcode == ARM_BUILTIN_WSLLHI)
26149                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi16 in code.");
26150               else if (fcode == ARM_BUILTIN_WSLLWI)
26151                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi32 in code.");
26152               else if (fcode == ARM_BUILTIN_WSLLDI)
26153                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_si64 in code.");
26154               else if (fcode == ARM_BUILTIN_WSRAHI)
26155                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi16 in code.");
26156               else if (fcode == ARM_BUILTIN_WSRAWI)
26157                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi32 in code.");
26158               else if (fcode == ARM_BUILTIN_WSRADI)
26159                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_si64 in code.");
26160               else if (fcode == ARM_BUILTIN_WSRLH)
26161                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi16 in code.");
26162               else if (fcode == ARM_BUILTIN_WSRLW)
26163                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi32 in code.");
26164               else if (fcode == ARM_BUILTIN_WSRLD)
26165                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_si64 in code.");
26166               else if (fcode == ARM_BUILTIN_WSLLH)
26167                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi16 in code.");
26168               else if (fcode == ARM_BUILTIN_WSLLW)
26169                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi32 in code.");
26170               else if (fcode == ARM_BUILTIN_WSLLD)
26171                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_si64 in code.");
26172               else if (fcode == ARM_BUILTIN_WSRAH)
26173                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi16 in code.");
26174               else if (fcode == ARM_BUILTIN_WSRAW)
26175                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi32 in code.");
26176               else
26177                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_si64 in code.");
26178             }
26179         }
26180       return arm_expand_binop_builtin (icode, exp, target);
26181
26182     default:
26183       break;
26184     }
26185
26186   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26187     if (d->code == (const enum arm_builtins) fcode)
26188       return arm_expand_binop_builtin (d->icode, exp, target);
26189
26190   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26191     if (d->code == (const enum arm_builtins) fcode)
26192       return arm_expand_unop_builtin (d->icode, exp, target, 0);
26193
26194   for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26195     if (d->code == (const enum arm_builtins) fcode)
26196       return arm_expand_ternop_builtin (d->icode, exp, target);
26197
26198   /* @@@ Should really do something sensible here.  */
26199   return NULL_RTX;
26200 }
26201 \f
26202 /* Return the number (counting from 0) of
26203    the least significant set bit in MASK.  */
26204
26205 inline static int
26206 number_of_first_bit_set (unsigned mask)
26207 {
26208   return ctz_hwi (mask);
26209 }
26210
26211 /* Like emit_multi_reg_push, but allowing for a different set of
26212    registers to be described as saved.  MASK is the set of registers
26213    to be saved; REAL_REGS is the set of registers to be described as
26214    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26215
26216 static rtx_insn *
26217 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26218 {
26219   unsigned long regno;
26220   rtx par[10], tmp, reg;
26221   rtx_insn *insn;
26222   int i, j;
26223
26224   /* Build the parallel of the registers actually being stored.  */
26225   for (i = 0; mask; ++i, mask &= mask - 1)
26226     {
26227       regno = ctz_hwi (mask);
26228       reg = gen_rtx_REG (SImode, regno);
26229
26230       if (i == 0)
26231         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26232       else
26233         tmp = gen_rtx_USE (VOIDmode, reg);
26234
26235       par[i] = tmp;
26236     }
26237
26238   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26239   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26240   tmp = gen_frame_mem (BLKmode, tmp);
26241   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26242   par[0] = tmp;
26243
26244   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26245   insn = emit_insn (tmp);
26246
26247   /* Always build the stack adjustment note for unwind info.  */
26248   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26249   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26250   par[0] = tmp;
26251
26252   /* Build the parallel of the registers recorded as saved for unwind.  */
26253   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26254     {
26255       regno = ctz_hwi (real_regs);
26256       reg = gen_rtx_REG (SImode, regno);
26257
26258       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26259       tmp = gen_frame_mem (SImode, tmp);
26260       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26261       RTX_FRAME_RELATED_P (tmp) = 1;
26262       par[j + 1] = tmp;
26263     }
26264
26265   if (j == 0)
26266     tmp = par[0];
26267   else
26268     {
26269       RTX_FRAME_RELATED_P (par[0]) = 1;
26270       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26271     }
26272
26273   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26274
26275   return insn;
26276 }
26277
26278 /* Emit code to push or pop registers to or from the stack.  F is the
26279    assembly file.  MASK is the registers to pop.  */
26280 static void
26281 thumb_pop (FILE *f, unsigned long mask)
26282 {
26283   int regno;
26284   int lo_mask = mask & 0xFF;
26285   int pushed_words = 0;
26286
26287   gcc_assert (mask);
26288
26289   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26290     {
26291       /* Special case.  Do not generate a POP PC statement here, do it in
26292          thumb_exit() */
26293       thumb_exit (f, -1);
26294       return;
26295     }
26296
26297   fprintf (f, "\tpop\t{");
26298
26299   /* Look at the low registers first.  */
26300   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26301     {
26302       if (lo_mask & 1)
26303         {
26304           asm_fprintf (f, "%r", regno);
26305
26306           if ((lo_mask & ~1) != 0)
26307             fprintf (f, ", ");
26308
26309           pushed_words++;
26310         }
26311     }
26312
26313   if (mask & (1 << PC_REGNUM))
26314     {
26315       /* Catch popping the PC.  */
26316       if (TARGET_INTERWORK || TARGET_BACKTRACE
26317           || crtl->calls_eh_return)
26318         {
26319           /* The PC is never poped directly, instead
26320              it is popped into r3 and then BX is used.  */
26321           fprintf (f, "}\n");
26322
26323           thumb_exit (f, -1);
26324
26325           return;
26326         }
26327       else
26328         {
26329           if (mask & 0xFF)
26330             fprintf (f, ", ");
26331
26332           asm_fprintf (f, "%r", PC_REGNUM);
26333         }
26334     }
26335
26336   fprintf (f, "}\n");
26337 }
26338
26339 /* Generate code to return from a thumb function.
26340    If 'reg_containing_return_addr' is -1, then the return address is
26341    actually on the stack, at the stack pointer.  */
26342 static void
26343 thumb_exit (FILE *f, int reg_containing_return_addr)
26344 {
26345   unsigned regs_available_for_popping;
26346   unsigned regs_to_pop;
26347   int pops_needed;
26348   unsigned available;
26349   unsigned required;
26350   machine_mode mode;
26351   int size;
26352   int restore_a4 = FALSE;
26353
26354   /* Compute the registers we need to pop.  */
26355   regs_to_pop = 0;
26356   pops_needed = 0;
26357
26358   if (reg_containing_return_addr == -1)
26359     {
26360       regs_to_pop |= 1 << LR_REGNUM;
26361       ++pops_needed;
26362     }
26363
26364   if (TARGET_BACKTRACE)
26365     {
26366       /* Restore the (ARM) frame pointer and stack pointer.  */
26367       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26368       pops_needed += 2;
26369     }
26370
26371   /* If there is nothing to pop then just emit the BX instruction and
26372      return.  */
26373   if (pops_needed == 0)
26374     {
26375       if (crtl->calls_eh_return)
26376         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26377
26378       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26379       return;
26380     }
26381   /* Otherwise if we are not supporting interworking and we have not created
26382      a backtrace structure and the function was not entered in ARM mode then
26383      just pop the return address straight into the PC.  */
26384   else if (!TARGET_INTERWORK
26385            && !TARGET_BACKTRACE
26386            && !is_called_in_ARM_mode (current_function_decl)
26387            && !crtl->calls_eh_return)
26388     {
26389       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26390       return;
26391     }
26392
26393   /* Find out how many of the (return) argument registers we can corrupt.  */
26394   regs_available_for_popping = 0;
26395
26396   /* If returning via __builtin_eh_return, the bottom three registers
26397      all contain information needed for the return.  */
26398   if (crtl->calls_eh_return)
26399     size = 12;
26400   else
26401     {
26402       /* If we can deduce the registers used from the function's
26403          return value.  This is more reliable that examining
26404          df_regs_ever_live_p () because that will be set if the register is
26405          ever used in the function, not just if the register is used
26406          to hold a return value.  */
26407
26408       if (crtl->return_rtx != 0)
26409         mode = GET_MODE (crtl->return_rtx);
26410       else
26411         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26412
26413       size = GET_MODE_SIZE (mode);
26414
26415       if (size == 0)
26416         {
26417           /* In a void function we can use any argument register.
26418              In a function that returns a structure on the stack
26419              we can use the second and third argument registers.  */
26420           if (mode == VOIDmode)
26421             regs_available_for_popping =
26422               (1 << ARG_REGISTER (1))
26423               | (1 << ARG_REGISTER (2))
26424               | (1 << ARG_REGISTER (3));
26425           else
26426             regs_available_for_popping =
26427               (1 << ARG_REGISTER (2))
26428               | (1 << ARG_REGISTER (3));
26429         }
26430       else if (size <= 4)
26431         regs_available_for_popping =
26432           (1 << ARG_REGISTER (2))
26433           | (1 << ARG_REGISTER (3));
26434       else if (size <= 8)
26435         regs_available_for_popping =
26436           (1 << ARG_REGISTER (3));
26437     }
26438
26439   /* Match registers to be popped with registers into which we pop them.  */
26440   for (available = regs_available_for_popping,
26441        required  = regs_to_pop;
26442        required != 0 && available != 0;
26443        available &= ~(available & - available),
26444        required  &= ~(required  & - required))
26445     -- pops_needed;
26446
26447   /* If we have any popping registers left over, remove them.  */
26448   if (available > 0)
26449     regs_available_for_popping &= ~available;
26450
26451   /* Otherwise if we need another popping register we can use
26452      the fourth argument register.  */
26453   else if (pops_needed)
26454     {
26455       /* If we have not found any free argument registers and
26456          reg a4 contains the return address, we must move it.  */
26457       if (regs_available_for_popping == 0
26458           && reg_containing_return_addr == LAST_ARG_REGNUM)
26459         {
26460           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26461           reg_containing_return_addr = LR_REGNUM;
26462         }
26463       else if (size > 12)
26464         {
26465           /* Register a4 is being used to hold part of the return value,
26466              but we have dire need of a free, low register.  */
26467           restore_a4 = TRUE;
26468
26469           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26470         }
26471
26472       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26473         {
26474           /* The fourth argument register is available.  */
26475           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26476
26477           --pops_needed;
26478         }
26479     }
26480
26481   /* Pop as many registers as we can.  */
26482   thumb_pop (f, regs_available_for_popping);
26483
26484   /* Process the registers we popped.  */
26485   if (reg_containing_return_addr == -1)
26486     {
26487       /* The return address was popped into the lowest numbered register.  */
26488       regs_to_pop &= ~(1 << LR_REGNUM);
26489
26490       reg_containing_return_addr =
26491         number_of_first_bit_set (regs_available_for_popping);
26492
26493       /* Remove this register for the mask of available registers, so that
26494          the return address will not be corrupted by further pops.  */
26495       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26496     }
26497
26498   /* If we popped other registers then handle them here.  */
26499   if (regs_available_for_popping)
26500     {
26501       int frame_pointer;
26502
26503       /* Work out which register currently contains the frame pointer.  */
26504       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26505
26506       /* Move it into the correct place.  */
26507       asm_fprintf (f, "\tmov\t%r, %r\n",
26508                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26509
26510       /* (Temporarily) remove it from the mask of popped registers.  */
26511       regs_available_for_popping &= ~(1 << frame_pointer);
26512       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26513
26514       if (regs_available_for_popping)
26515         {
26516           int stack_pointer;
26517
26518           /* We popped the stack pointer as well,
26519              find the register that contains it.  */
26520           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26521
26522           /* Move it into the stack register.  */
26523           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26524
26525           /* At this point we have popped all necessary registers, so
26526              do not worry about restoring regs_available_for_popping
26527              to its correct value:
26528
26529              assert (pops_needed == 0)
26530              assert (regs_available_for_popping == (1 << frame_pointer))
26531              assert (regs_to_pop == (1 << STACK_POINTER))  */
26532         }
26533       else
26534         {
26535           /* Since we have just move the popped value into the frame
26536              pointer, the popping register is available for reuse, and
26537              we know that we still have the stack pointer left to pop.  */
26538           regs_available_for_popping |= (1 << frame_pointer);
26539         }
26540     }
26541
26542   /* If we still have registers left on the stack, but we no longer have
26543      any registers into which we can pop them, then we must move the return
26544      address into the link register and make available the register that
26545      contained it.  */
26546   if (regs_available_for_popping == 0 && pops_needed > 0)
26547     {
26548       regs_available_for_popping |= 1 << reg_containing_return_addr;
26549
26550       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26551                    reg_containing_return_addr);
26552
26553       reg_containing_return_addr = LR_REGNUM;
26554     }
26555
26556   /* If we have registers left on the stack then pop some more.
26557      We know that at most we will want to pop FP and SP.  */
26558   if (pops_needed > 0)
26559     {
26560       int  popped_into;
26561       int  move_to;
26562
26563       thumb_pop (f, regs_available_for_popping);
26564
26565       /* We have popped either FP or SP.
26566          Move whichever one it is into the correct register.  */
26567       popped_into = number_of_first_bit_set (regs_available_for_popping);
26568       move_to     = number_of_first_bit_set (regs_to_pop);
26569
26570       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26571
26572       regs_to_pop &= ~(1 << move_to);
26573
26574       --pops_needed;
26575     }
26576
26577   /* If we still have not popped everything then we must have only
26578      had one register available to us and we are now popping the SP.  */
26579   if (pops_needed > 0)
26580     {
26581       int  popped_into;
26582
26583       thumb_pop (f, regs_available_for_popping);
26584
26585       popped_into = number_of_first_bit_set (regs_available_for_popping);
26586
26587       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26588       /*
26589         assert (regs_to_pop == (1 << STACK_POINTER))
26590         assert (pops_needed == 1)
26591       */
26592     }
26593
26594   /* If necessary restore the a4 register.  */
26595   if (restore_a4)
26596     {
26597       if (reg_containing_return_addr != LR_REGNUM)
26598         {
26599           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26600           reg_containing_return_addr = LR_REGNUM;
26601         }
26602
26603       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26604     }
26605
26606   if (crtl->calls_eh_return)
26607     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26608
26609   /* Return to caller.  */
26610   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26611 }
26612 \f
26613 /* Scan INSN just before assembler is output for it.
26614    For Thumb-1, we track the status of the condition codes; this
26615    information is used in the cbranchsi4_insn pattern.  */
26616 void
26617 thumb1_final_prescan_insn (rtx_insn *insn)
26618 {
26619   if (flag_print_asm_name)
26620     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26621                  INSN_ADDRESSES (INSN_UID (insn)));
26622   /* Don't overwrite the previous setter when we get to a cbranch.  */
26623   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26624     {
26625       enum attr_conds conds;
26626
26627       if (cfun->machine->thumb1_cc_insn)
26628         {
26629           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26630               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26631             CC_STATUS_INIT;
26632         }
26633       conds = get_attr_conds (insn);
26634       if (conds == CONDS_SET)
26635         {
26636           rtx set = single_set (insn);
26637           cfun->machine->thumb1_cc_insn = insn;
26638           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26639           cfun->machine->thumb1_cc_op1 = const0_rtx;
26640           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26641           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26642             {
26643               rtx src1 = XEXP (SET_SRC (set), 1);
26644               if (src1 == const0_rtx)
26645                 cfun->machine->thumb1_cc_mode = CCmode;
26646             }
26647           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26648             {
26649               /* Record the src register operand instead of dest because
26650                  cprop_hardreg pass propagates src.  */
26651               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26652             }
26653         }
26654       else if (conds != CONDS_NOCOND)
26655         cfun->machine->thumb1_cc_insn = NULL_RTX;
26656     }
26657
26658     /* Check if unexpected far jump is used.  */
26659     if (cfun->machine->lr_save_eliminated
26660         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26661       internal_error("Unexpected thumb1 far jump");
26662 }
26663
26664 int
26665 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26666 {
26667   unsigned HOST_WIDE_INT mask = 0xff;
26668   int i;
26669
26670   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26671   if (val == 0) /* XXX */
26672     return 0;
26673
26674   for (i = 0; i < 25; i++)
26675     if ((val & (mask << i)) == val)
26676       return 1;
26677
26678   return 0;
26679 }
26680
26681 /* Returns nonzero if the current function contains,
26682    or might contain a far jump.  */
26683 static int
26684 thumb_far_jump_used_p (void)
26685 {
26686   rtx_insn *insn;
26687   bool far_jump = false;
26688   unsigned int func_size = 0;
26689
26690   /* This test is only important for leaf functions.  */
26691   /* assert (!leaf_function_p ()); */
26692
26693   /* If we have already decided that far jumps may be used,
26694      do not bother checking again, and always return true even if
26695      it turns out that they are not being used.  Once we have made
26696      the decision that far jumps are present (and that hence the link
26697      register will be pushed onto the stack) we cannot go back on it.  */
26698   if (cfun->machine->far_jump_used)
26699     return 1;
26700
26701   /* If this function is not being called from the prologue/epilogue
26702      generation code then it must be being called from the
26703      INITIAL_ELIMINATION_OFFSET macro.  */
26704   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26705     {
26706       /* In this case we know that we are being asked about the elimination
26707          of the arg pointer register.  If that register is not being used,
26708          then there are no arguments on the stack, and we do not have to
26709          worry that a far jump might force the prologue to push the link
26710          register, changing the stack offsets.  In this case we can just
26711          return false, since the presence of far jumps in the function will
26712          not affect stack offsets.
26713
26714          If the arg pointer is live (or if it was live, but has now been
26715          eliminated and so set to dead) then we do have to test to see if
26716          the function might contain a far jump.  This test can lead to some
26717          false negatives, since before reload is completed, then length of
26718          branch instructions is not known, so gcc defaults to returning their
26719          longest length, which in turn sets the far jump attribute to true.
26720
26721          A false negative will not result in bad code being generated, but it
26722          will result in a needless push and pop of the link register.  We
26723          hope that this does not occur too often.
26724
26725          If we need doubleword stack alignment this could affect the other
26726          elimination offsets so we can't risk getting it wrong.  */
26727       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26728         cfun->machine->arg_pointer_live = 1;
26729       else if (!cfun->machine->arg_pointer_live)
26730         return 0;
26731     }
26732
26733   /* We should not change far_jump_used during or after reload, as there is
26734      no chance to change stack frame layout.  */
26735   if (reload_in_progress || reload_completed)
26736     return 0;
26737
26738   /* Check to see if the function contains a branch
26739      insn with the far jump attribute set.  */
26740   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26741     {
26742       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26743         {
26744           far_jump = true;
26745         }
26746       func_size += get_attr_length (insn);
26747     }
26748
26749   /* Attribute far_jump will always be true for thumb1 before
26750      shorten_branch pass.  So checking far_jump attribute before
26751      shorten_branch isn't much useful.
26752
26753      Following heuristic tries to estimate more accurately if a far jump
26754      may finally be used.  The heuristic is very conservative as there is
26755      no chance to roll-back the decision of not to use far jump.
26756
26757      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26758      2-byte insn is associated with a 4 byte constant pool.  Using
26759      function size 2048/3 as the threshold is conservative enough.  */
26760   if (far_jump)
26761     {
26762       if ((func_size * 3) >= 2048)
26763         {
26764           /* Record the fact that we have decided that
26765              the function does use far jumps.  */
26766           cfun->machine->far_jump_used = 1;
26767           return 1;
26768         }
26769     }
26770
26771   return 0;
26772 }
26773
26774 /* Return nonzero if FUNC must be entered in ARM mode.  */
26775 int
26776 is_called_in_ARM_mode (tree func)
26777 {
26778   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26779
26780   /* Ignore the problem about functions whose address is taken.  */
26781   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26782     return TRUE;
26783
26784 #ifdef ARM_PE
26785   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26786 #else
26787   return FALSE;
26788 #endif
26789 }
26790
26791 /* Given the stack offsets and register mask in OFFSETS, decide how
26792    many additional registers to push instead of subtracting a constant
26793    from SP.  For epilogues the principle is the same except we use pop.
26794    FOR_PROLOGUE indicates which we're generating.  */
26795 static int
26796 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26797 {
26798   HOST_WIDE_INT amount;
26799   unsigned long live_regs_mask = offsets->saved_regs_mask;
26800   /* Extract a mask of the ones we can give to the Thumb's push/pop
26801      instruction.  */
26802   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26803   /* Then count how many other high registers will need to be pushed.  */
26804   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26805   int n_free, reg_base, size;
26806
26807   if (!for_prologue && frame_pointer_needed)
26808     amount = offsets->locals_base - offsets->saved_regs;
26809   else
26810     amount = offsets->outgoing_args - offsets->saved_regs;
26811
26812   /* If the stack frame size is 512 exactly, we can save one load
26813      instruction, which should make this a win even when optimizing
26814      for speed.  */
26815   if (!optimize_size && amount != 512)
26816     return 0;
26817
26818   /* Can't do this if there are high registers to push.  */
26819   if (high_regs_pushed != 0)
26820     return 0;
26821
26822   /* Shouldn't do it in the prologue if no registers would normally
26823      be pushed at all.  In the epilogue, also allow it if we'll have
26824      a pop insn for the PC.  */
26825   if  (l_mask == 0
26826        && (for_prologue
26827            || TARGET_BACKTRACE
26828            || (live_regs_mask & 1 << LR_REGNUM) == 0
26829            || TARGET_INTERWORK
26830            || crtl->args.pretend_args_size != 0))
26831     return 0;
26832
26833   /* Don't do this if thumb_expand_prologue wants to emit instructions
26834      between the push and the stack frame allocation.  */
26835   if (for_prologue
26836       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26837           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26838     return 0;
26839
26840   reg_base = 0;
26841   n_free = 0;
26842   if (!for_prologue)
26843     {
26844       size = arm_size_return_regs ();
26845       reg_base = ARM_NUM_INTS (size);
26846       live_regs_mask >>= reg_base;
26847     }
26848
26849   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26850          && (for_prologue || call_used_regs[reg_base + n_free]))
26851     {
26852       live_regs_mask >>= 1;
26853       n_free++;
26854     }
26855
26856   if (n_free == 0)
26857     return 0;
26858   gcc_assert (amount / 4 * 4 == amount);
26859
26860   if (amount >= 512 && (amount - n_free * 4) < 512)
26861     return (amount - 508) / 4;
26862   if (amount <= n_free * 4)
26863     return amount / 4;
26864   return 0;
26865 }
26866
26867 /* The bits which aren't usefully expanded as rtl.  */
26868 const char *
26869 thumb1_unexpanded_epilogue (void)
26870 {
26871   arm_stack_offsets *offsets;
26872   int regno;
26873   unsigned long live_regs_mask = 0;
26874   int high_regs_pushed = 0;
26875   int extra_pop;
26876   int had_to_push_lr;
26877   int size;
26878
26879   if (cfun->machine->return_used_this_function != 0)
26880     return "";
26881
26882   if (IS_NAKED (arm_current_func_type ()))
26883     return "";
26884
26885   offsets = arm_get_frame_offsets ();
26886   live_regs_mask = offsets->saved_regs_mask;
26887   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26888
26889   /* If we can deduce the registers used from the function's return value.
26890      This is more reliable that examining df_regs_ever_live_p () because that
26891      will be set if the register is ever used in the function, not just if
26892      the register is used to hold a return value.  */
26893   size = arm_size_return_regs ();
26894
26895   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26896   if (extra_pop > 0)
26897     {
26898       unsigned long extra_mask = (1 << extra_pop) - 1;
26899       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26900     }
26901
26902   /* The prolog may have pushed some high registers to use as
26903      work registers.  e.g. the testsuite file:
26904      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26905      compiles to produce:
26906         push    {r4, r5, r6, r7, lr}
26907         mov     r7, r9
26908         mov     r6, r8
26909         push    {r6, r7}
26910      as part of the prolog.  We have to undo that pushing here.  */
26911
26912   if (high_regs_pushed)
26913     {
26914       unsigned long mask = live_regs_mask & 0xff;
26915       int next_hi_reg;
26916
26917       /* The available low registers depend on the size of the value we are
26918          returning.  */
26919       if (size <= 12)
26920         mask |=  1 << 3;
26921       if (size <= 8)
26922         mask |= 1 << 2;
26923
26924       if (mask == 0)
26925         /* Oh dear!  We have no low registers into which we can pop
26926            high registers!  */
26927         internal_error
26928           ("no low registers available for popping high registers");
26929
26930       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26931         if (live_regs_mask & (1 << next_hi_reg))
26932           break;
26933
26934       while (high_regs_pushed)
26935         {
26936           /* Find lo register(s) into which the high register(s) can
26937              be popped.  */
26938           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26939             {
26940               if (mask & (1 << regno))
26941                 high_regs_pushed--;
26942               if (high_regs_pushed == 0)
26943                 break;
26944             }
26945
26946           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
26947
26948           /* Pop the values into the low register(s).  */
26949           thumb_pop (asm_out_file, mask);
26950
26951           /* Move the value(s) into the high registers.  */
26952           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26953             {
26954               if (mask & (1 << regno))
26955                 {
26956                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26957                                regno);
26958
26959                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26960                     if (live_regs_mask & (1 << next_hi_reg))
26961                       break;
26962                 }
26963             }
26964         }
26965       live_regs_mask &= ~0x0f00;
26966     }
26967
26968   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26969   live_regs_mask &= 0xff;
26970
26971   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26972     {
26973       /* Pop the return address into the PC.  */
26974       if (had_to_push_lr)
26975         live_regs_mask |= 1 << PC_REGNUM;
26976
26977       /* Either no argument registers were pushed or a backtrace
26978          structure was created which includes an adjusted stack
26979          pointer, so just pop everything.  */
26980       if (live_regs_mask)
26981         thumb_pop (asm_out_file, live_regs_mask);
26982
26983       /* We have either just popped the return address into the
26984          PC or it is was kept in LR for the entire function.
26985          Note that thumb_pop has already called thumb_exit if the
26986          PC was in the list.  */
26987       if (!had_to_push_lr)
26988         thumb_exit (asm_out_file, LR_REGNUM);
26989     }
26990   else
26991     {
26992       /* Pop everything but the return address.  */
26993       if (live_regs_mask)
26994         thumb_pop (asm_out_file, live_regs_mask);
26995
26996       if (had_to_push_lr)
26997         {
26998           if (size > 12)
26999             {
27000               /* We have no free low regs, so save one.  */
27001               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27002                            LAST_ARG_REGNUM);
27003             }
27004
27005           /* Get the return address into a temporary register.  */
27006           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27007
27008           if (size > 12)
27009             {
27010               /* Move the return address to lr.  */
27011               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27012                            LAST_ARG_REGNUM);
27013               /* Restore the low register.  */
27014               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27015                            IP_REGNUM);
27016               regno = LR_REGNUM;
27017             }
27018           else
27019             regno = LAST_ARG_REGNUM;
27020         }
27021       else
27022         regno = LR_REGNUM;
27023
27024       /* Remove the argument registers that were pushed onto the stack.  */
27025       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27026                    SP_REGNUM, SP_REGNUM,
27027                    crtl->args.pretend_args_size);
27028
27029       thumb_exit (asm_out_file, regno);
27030     }
27031
27032   return "";
27033 }
27034
27035 /* Functions to save and restore machine-specific function data.  */
27036 static struct machine_function *
27037 arm_init_machine_status (void)
27038 {
27039   struct machine_function *machine;
27040   machine = ggc_cleared_alloc<machine_function> ();
27041
27042 #if ARM_FT_UNKNOWN != 0
27043   machine->func_type = ARM_FT_UNKNOWN;
27044 #endif
27045   return machine;
27046 }
27047
27048 /* Return an RTX indicating where the return address to the
27049    calling function can be found.  */
27050 rtx
27051 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27052 {
27053   if (count != 0)
27054     return NULL_RTX;
27055
27056   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27057 }
27058
27059 /* Do anything needed before RTL is emitted for each function.  */
27060 void
27061 arm_init_expanders (void)
27062 {
27063   /* Arrange to initialize and mark the machine per-function status.  */
27064   init_machine_status = arm_init_machine_status;
27065
27066   /* This is to stop the combine pass optimizing away the alignment
27067      adjustment of va_arg.  */
27068   /* ??? It is claimed that this should not be necessary.  */
27069   if (cfun)
27070     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27071 }
27072
27073
27074 /* Like arm_compute_initial_elimination offset.  Simpler because there
27075    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27076    to point at the base of the local variables after static stack
27077    space for a function has been allocated.  */
27078
27079 HOST_WIDE_INT
27080 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27081 {
27082   arm_stack_offsets *offsets;
27083
27084   offsets = arm_get_frame_offsets ();
27085
27086   switch (from)
27087     {
27088     case ARG_POINTER_REGNUM:
27089       switch (to)
27090         {
27091         case STACK_POINTER_REGNUM:
27092           return offsets->outgoing_args - offsets->saved_args;
27093
27094         case FRAME_POINTER_REGNUM:
27095           return offsets->soft_frame - offsets->saved_args;
27096
27097         case ARM_HARD_FRAME_POINTER_REGNUM:
27098           return offsets->saved_regs - offsets->saved_args;
27099
27100         case THUMB_HARD_FRAME_POINTER_REGNUM:
27101           return offsets->locals_base - offsets->saved_args;
27102
27103         default:
27104           gcc_unreachable ();
27105         }
27106       break;
27107
27108     case FRAME_POINTER_REGNUM:
27109       switch (to)
27110         {
27111         case STACK_POINTER_REGNUM:
27112           return offsets->outgoing_args - offsets->soft_frame;
27113
27114         case ARM_HARD_FRAME_POINTER_REGNUM:
27115           return offsets->saved_regs - offsets->soft_frame;
27116
27117         case THUMB_HARD_FRAME_POINTER_REGNUM:
27118           return offsets->locals_base - offsets->soft_frame;
27119
27120         default:
27121           gcc_unreachable ();
27122         }
27123       break;
27124
27125     default:
27126       gcc_unreachable ();
27127     }
27128 }
27129
27130 /* Generate the function's prologue.  */
27131
27132 void
27133 thumb1_expand_prologue (void)
27134 {
27135   rtx_insn *insn;
27136
27137   HOST_WIDE_INT amount;
27138   arm_stack_offsets *offsets;
27139   unsigned long func_type;
27140   int regno;
27141   unsigned long live_regs_mask;
27142   unsigned long l_mask;
27143   unsigned high_regs_pushed = 0;
27144
27145   func_type = arm_current_func_type ();
27146
27147   /* Naked functions don't have prologues.  */
27148   if (IS_NAKED (func_type))
27149     return;
27150
27151   if (IS_INTERRUPT (func_type))
27152     {
27153       error ("interrupt Service Routines cannot be coded in Thumb mode");
27154       return;
27155     }
27156
27157   if (is_called_in_ARM_mode (current_function_decl))
27158     emit_insn (gen_prologue_thumb1_interwork ());
27159
27160   offsets = arm_get_frame_offsets ();
27161   live_regs_mask = offsets->saved_regs_mask;
27162
27163   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27164   l_mask = live_regs_mask & 0x40ff;
27165   /* Then count how many other high registers will need to be pushed.  */
27166   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27167
27168   if (crtl->args.pretend_args_size)
27169     {
27170       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27171
27172       if (cfun->machine->uses_anonymous_args)
27173         {
27174           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27175           unsigned long mask;
27176
27177           mask = 1ul << (LAST_ARG_REGNUM + 1);
27178           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27179
27180           insn = thumb1_emit_multi_reg_push (mask, 0);
27181         }
27182       else
27183         {
27184           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27185                                         stack_pointer_rtx, x));
27186         }
27187       RTX_FRAME_RELATED_P (insn) = 1;
27188     }
27189
27190   if (TARGET_BACKTRACE)
27191     {
27192       HOST_WIDE_INT offset = 0;
27193       unsigned work_register;
27194       rtx work_reg, x, arm_hfp_rtx;
27195
27196       /* We have been asked to create a stack backtrace structure.
27197          The code looks like this:
27198
27199          0   .align 2
27200          0   func:
27201          0     sub   SP, #16         Reserve space for 4 registers.
27202          2     push  {R7}            Push low registers.
27203          4     add   R7, SP, #20     Get the stack pointer before the push.
27204          6     str   R7, [SP, #8]    Store the stack pointer
27205                                         (before reserving the space).
27206          8     mov   R7, PC          Get hold of the start of this code + 12.
27207         10     str   R7, [SP, #16]   Store it.
27208         12     mov   R7, FP          Get hold of the current frame pointer.
27209         14     str   R7, [SP, #4]    Store it.
27210         16     mov   R7, LR          Get hold of the current return address.
27211         18     str   R7, [SP, #12]   Store it.
27212         20     add   R7, SP, #16     Point at the start of the
27213                                         backtrace structure.
27214         22     mov   FP, R7          Put this value into the frame pointer.  */
27215
27216       work_register = thumb_find_work_register (live_regs_mask);
27217       work_reg = gen_rtx_REG (SImode, work_register);
27218       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27219
27220       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27221                                     stack_pointer_rtx, GEN_INT (-16)));
27222       RTX_FRAME_RELATED_P (insn) = 1;
27223
27224       if (l_mask)
27225         {
27226           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27227           RTX_FRAME_RELATED_P (insn) = 1;
27228
27229           offset = bit_count (l_mask) * UNITS_PER_WORD;
27230         }
27231
27232       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27233       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27234
27235       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27236       x = gen_frame_mem (SImode, x);
27237       emit_move_insn (x, work_reg);
27238
27239       /* Make sure that the instruction fetching the PC is in the right place
27240          to calculate "start of backtrace creation code + 12".  */
27241       /* ??? The stores using the common WORK_REG ought to be enough to
27242          prevent the scheduler from doing anything weird.  Failing that
27243          we could always move all of the following into an UNSPEC_VOLATILE.  */
27244       if (l_mask)
27245         {
27246           x = gen_rtx_REG (SImode, PC_REGNUM);
27247           emit_move_insn (work_reg, x);
27248
27249           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27250           x = gen_frame_mem (SImode, x);
27251           emit_move_insn (x, work_reg);
27252
27253           emit_move_insn (work_reg, arm_hfp_rtx);
27254
27255           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27256           x = gen_frame_mem (SImode, x);
27257           emit_move_insn (x, work_reg);
27258         }
27259       else
27260         {
27261           emit_move_insn (work_reg, arm_hfp_rtx);
27262
27263           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27264           x = gen_frame_mem (SImode, x);
27265           emit_move_insn (x, work_reg);
27266
27267           x = gen_rtx_REG (SImode, PC_REGNUM);
27268           emit_move_insn (work_reg, x);
27269
27270           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27271           x = gen_frame_mem (SImode, x);
27272           emit_move_insn (x, work_reg);
27273         }
27274
27275       x = gen_rtx_REG (SImode, LR_REGNUM);
27276       emit_move_insn (work_reg, x);
27277
27278       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27279       x = gen_frame_mem (SImode, x);
27280       emit_move_insn (x, work_reg);
27281
27282       x = GEN_INT (offset + 12);
27283       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27284
27285       emit_move_insn (arm_hfp_rtx, work_reg);
27286     }
27287   /* Optimization:  If we are not pushing any low registers but we are going
27288      to push some high registers then delay our first push.  This will just
27289      be a push of LR and we can combine it with the push of the first high
27290      register.  */
27291   else if ((l_mask & 0xff) != 0
27292            || (high_regs_pushed == 0 && l_mask))
27293     {
27294       unsigned long mask = l_mask;
27295       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27296       insn = thumb1_emit_multi_reg_push (mask, mask);
27297       RTX_FRAME_RELATED_P (insn) = 1;
27298     }
27299
27300   if (high_regs_pushed)
27301     {
27302       unsigned pushable_regs;
27303       unsigned next_hi_reg;
27304       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27305                                                  : crtl->args.info.nregs;
27306       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27307
27308       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27309         if (live_regs_mask & (1 << next_hi_reg))
27310           break;
27311
27312       /* Here we need to mask out registers used for passing arguments
27313          even if they can be pushed.  This is to avoid using them to stash the high
27314          registers.  Such kind of stash may clobber the use of arguments.  */
27315       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27316
27317       if (pushable_regs == 0)
27318         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27319
27320       while (high_regs_pushed > 0)
27321         {
27322           unsigned long real_regs_mask = 0;
27323
27324           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27325             {
27326               if (pushable_regs & (1 << regno))
27327                 {
27328                   emit_move_insn (gen_rtx_REG (SImode, regno),
27329                                   gen_rtx_REG (SImode, next_hi_reg));
27330
27331                   high_regs_pushed --;
27332                   real_regs_mask |= (1 << next_hi_reg);
27333
27334                   if (high_regs_pushed)
27335                     {
27336                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27337                            next_hi_reg --)
27338                         if (live_regs_mask & (1 << next_hi_reg))
27339                           break;
27340                     }
27341                   else
27342                     {
27343                       pushable_regs &= ~((1 << regno) - 1);
27344                       break;
27345                     }
27346                 }
27347             }
27348
27349           /* If we had to find a work register and we have not yet
27350              saved the LR then add it to the list of regs to push.  */
27351           if (l_mask == (1 << LR_REGNUM))
27352             {
27353               pushable_regs |= l_mask;
27354               real_regs_mask |= l_mask;
27355               l_mask = 0;
27356             }
27357
27358           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27359           RTX_FRAME_RELATED_P (insn) = 1;
27360         }
27361     }
27362
27363   /* Load the pic register before setting the frame pointer,
27364      so we can use r7 as a temporary work register.  */
27365   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27366     arm_load_pic_register (live_regs_mask);
27367
27368   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27369     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27370                     stack_pointer_rtx);
27371
27372   if (flag_stack_usage_info)
27373     current_function_static_stack_size
27374       = offsets->outgoing_args - offsets->saved_args;
27375
27376   amount = offsets->outgoing_args - offsets->saved_regs;
27377   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27378   if (amount)
27379     {
27380       if (amount < 512)
27381         {
27382           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27383                                         GEN_INT (- amount)));
27384           RTX_FRAME_RELATED_P (insn) = 1;
27385         }
27386       else
27387         {
27388           rtx reg, dwarf;
27389
27390           /* The stack decrement is too big for an immediate value in a single
27391              insn.  In theory we could issue multiple subtracts, but after
27392              three of them it becomes more space efficient to place the full
27393              value in the constant pool and load into a register.  (Also the
27394              ARM debugger really likes to see only one stack decrement per
27395              function).  So instead we look for a scratch register into which
27396              we can load the decrement, and then we subtract this from the
27397              stack pointer.  Unfortunately on the thumb the only available
27398              scratch registers are the argument registers, and we cannot use
27399              these as they may hold arguments to the function.  Instead we
27400              attempt to locate a call preserved register which is used by this
27401              function.  If we can find one, then we know that it will have
27402              been pushed at the start of the prologue and so we can corrupt
27403              it now.  */
27404           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27405             if (live_regs_mask & (1 << regno))
27406               break;
27407
27408           gcc_assert(regno <= LAST_LO_REGNUM);
27409
27410           reg = gen_rtx_REG (SImode, regno);
27411
27412           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27413
27414           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27415                                         stack_pointer_rtx, reg));
27416
27417           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27418                                plus_constant (Pmode, stack_pointer_rtx,
27419                                               -amount));
27420           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27421           RTX_FRAME_RELATED_P (insn) = 1;
27422         }
27423     }
27424
27425   if (frame_pointer_needed)
27426     thumb_set_frame_pointer (offsets);
27427
27428   /* If we are profiling, make sure no instructions are scheduled before
27429      the call to mcount.  Similarly if the user has requested no
27430      scheduling in the prolog.  Similarly if we want non-call exceptions
27431      using the EABI unwinder, to prevent faulting instructions from being
27432      swapped with a stack adjustment.  */
27433   if (crtl->profile || !TARGET_SCHED_PROLOG
27434       || (arm_except_unwind_info (&global_options) == UI_TARGET
27435           && cfun->can_throw_non_call_exceptions))
27436     emit_insn (gen_blockage ());
27437
27438   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27439   if (live_regs_mask & 0xff)
27440     cfun->machine->lr_save_eliminated = 0;
27441 }
27442
27443 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27444    POP instruction can be generated.  LR should be replaced by PC.  All
27445    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27446    all we really need to check here is if single register is to be
27447    returned, or multiple register return.  */
27448 void
27449 thumb2_expand_return (bool simple_return)
27450 {
27451   int i, num_regs;
27452   unsigned long saved_regs_mask;
27453   arm_stack_offsets *offsets;
27454
27455   offsets = arm_get_frame_offsets ();
27456   saved_regs_mask = offsets->saved_regs_mask;
27457
27458   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27459     if (saved_regs_mask & (1 << i))
27460       num_regs++;
27461
27462   if (!simple_return && saved_regs_mask)
27463     {
27464       if (num_regs == 1)
27465         {
27466           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27467           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27468           rtx addr = gen_rtx_MEM (SImode,
27469                                   gen_rtx_POST_INC (SImode,
27470                                                     stack_pointer_rtx));
27471           set_mem_alias_set (addr, get_frame_alias_set ());
27472           XVECEXP (par, 0, 0) = ret_rtx;
27473           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27474           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27475           emit_jump_insn (par);
27476         }
27477       else
27478         {
27479           saved_regs_mask &= ~ (1 << LR_REGNUM);
27480           saved_regs_mask |=   (1 << PC_REGNUM);
27481           arm_emit_multi_reg_pop (saved_regs_mask);
27482         }
27483     }
27484   else
27485     {
27486       emit_jump_insn (simple_return_rtx);
27487     }
27488 }
27489
27490 void
27491 thumb1_expand_epilogue (void)
27492 {
27493   HOST_WIDE_INT amount;
27494   arm_stack_offsets *offsets;
27495   int regno;
27496
27497   /* Naked functions don't have prologues.  */
27498   if (IS_NAKED (arm_current_func_type ()))
27499     return;
27500
27501   offsets = arm_get_frame_offsets ();
27502   amount = offsets->outgoing_args - offsets->saved_regs;
27503
27504   if (frame_pointer_needed)
27505     {
27506       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27507       amount = offsets->locals_base - offsets->saved_regs;
27508     }
27509   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27510
27511   gcc_assert (amount >= 0);
27512   if (amount)
27513     {
27514       emit_insn (gen_blockage ());
27515
27516       if (amount < 512)
27517         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27518                                GEN_INT (amount)));
27519       else
27520         {
27521           /* r3 is always free in the epilogue.  */
27522           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27523
27524           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27525           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27526         }
27527     }
27528
27529   /* Emit a USE (stack_pointer_rtx), so that
27530      the stack adjustment will not be deleted.  */
27531   emit_insn (gen_force_register_use (stack_pointer_rtx));
27532
27533   if (crtl->profile || !TARGET_SCHED_PROLOG)
27534     emit_insn (gen_blockage ());
27535
27536   /* Emit a clobber for each insn that will be restored in the epilogue,
27537      so that flow2 will get register lifetimes correct.  */
27538   for (regno = 0; regno < 13; regno++)
27539     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27540       emit_clobber (gen_rtx_REG (SImode, regno));
27541
27542   if (! df_regs_ever_live_p (LR_REGNUM))
27543     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27544 }
27545
27546 /* Epilogue code for APCS frame.  */
27547 static void
27548 arm_expand_epilogue_apcs_frame (bool really_return)
27549 {
27550   unsigned long func_type;
27551   unsigned long saved_regs_mask;
27552   int num_regs = 0;
27553   int i;
27554   int floats_from_frame = 0;
27555   arm_stack_offsets *offsets;
27556
27557   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27558   func_type = arm_current_func_type ();
27559
27560   /* Get frame offsets for ARM.  */
27561   offsets = arm_get_frame_offsets ();
27562   saved_regs_mask = offsets->saved_regs_mask;
27563
27564   /* Find the offset of the floating-point save area in the frame.  */
27565   floats_from_frame
27566     = (offsets->saved_args
27567        + arm_compute_static_chain_stack_bytes ()
27568        - offsets->frame);
27569
27570   /* Compute how many core registers saved and how far away the floats are.  */
27571   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27572     if (saved_regs_mask & (1 << i))
27573       {
27574         num_regs++;
27575         floats_from_frame += 4;
27576       }
27577
27578   if (TARGET_HARD_FLOAT && TARGET_VFP)
27579     {
27580       int start_reg;
27581       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27582
27583       /* The offset is from IP_REGNUM.  */
27584       int saved_size = arm_get_vfp_saved_size ();
27585       if (saved_size > 0)
27586         {
27587           rtx_insn *insn;
27588           floats_from_frame += saved_size;
27589           insn = emit_insn (gen_addsi3 (ip_rtx,
27590                                         hard_frame_pointer_rtx,
27591                                         GEN_INT (-floats_from_frame)));
27592           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27593                                        ip_rtx, hard_frame_pointer_rtx);
27594         }
27595
27596       /* Generate VFP register multi-pop.  */
27597       start_reg = FIRST_VFP_REGNUM;
27598
27599       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27600         /* Look for a case where a reg does not need restoring.  */
27601         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27602             && (!df_regs_ever_live_p (i + 1)
27603                 || call_used_regs[i + 1]))
27604           {
27605             if (start_reg != i)
27606               arm_emit_vfp_multi_reg_pop (start_reg,
27607                                           (i - start_reg) / 2,
27608                                           gen_rtx_REG (SImode,
27609                                                        IP_REGNUM));
27610             start_reg = i + 2;
27611           }
27612
27613       /* Restore the remaining regs that we have discovered (or possibly
27614          even all of them, if the conditional in the for loop never
27615          fired).  */
27616       if (start_reg != i)
27617         arm_emit_vfp_multi_reg_pop (start_reg,
27618                                     (i - start_reg) / 2,
27619                                     gen_rtx_REG (SImode, IP_REGNUM));
27620     }
27621
27622   if (TARGET_IWMMXT)
27623     {
27624       /* The frame pointer is guaranteed to be non-double-word aligned, as
27625          it is set to double-word-aligned old_stack_pointer - 4.  */
27626       rtx_insn *insn;
27627       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27628
27629       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27630         if (df_regs_ever_live_p (i) && !call_used_regs[i])
27631           {
27632             rtx addr = gen_frame_mem (V2SImode,
27633                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27634                                                 - lrm_count * 4));
27635             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27636             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27637                                                gen_rtx_REG (V2SImode, i),
27638                                                NULL_RTX);
27639             lrm_count += 2;
27640           }
27641     }
27642
27643   /* saved_regs_mask should contain IP which contains old stack pointer
27644      at the time of activation creation.  Since SP and IP are adjacent registers,
27645      we can restore the value directly into SP.  */
27646   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27647   saved_regs_mask &= ~(1 << IP_REGNUM);
27648   saved_regs_mask |= (1 << SP_REGNUM);
27649
27650   /* There are two registers left in saved_regs_mask - LR and PC.  We
27651      only need to restore LR (the return address), but to
27652      save time we can load it directly into PC, unless we need a
27653      special function exit sequence, or we are not really returning.  */
27654   if (really_return
27655       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27656       && !crtl->calls_eh_return)
27657     /* Delete LR from the register mask, so that LR on
27658        the stack is loaded into the PC in the register mask.  */
27659     saved_regs_mask &= ~(1 << LR_REGNUM);
27660   else
27661     saved_regs_mask &= ~(1 << PC_REGNUM);
27662
27663   num_regs = bit_count (saved_regs_mask);
27664   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27665     {
27666       rtx_insn *insn;
27667       emit_insn (gen_blockage ());
27668       /* Unwind the stack to just below the saved registers.  */
27669       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27670                                     hard_frame_pointer_rtx,
27671                                     GEN_INT (- 4 * num_regs)));
27672
27673       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27674                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27675     }
27676
27677   arm_emit_multi_reg_pop (saved_regs_mask);
27678
27679   if (IS_INTERRUPT (func_type))
27680     {
27681       /* Interrupt handlers will have pushed the
27682          IP onto the stack, so restore it now.  */
27683       rtx_insn *insn;
27684       rtx addr = gen_rtx_MEM (SImode,
27685                               gen_rtx_POST_INC (SImode,
27686                               stack_pointer_rtx));
27687       set_mem_alias_set (addr, get_frame_alias_set ());
27688       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27689       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27690                                          gen_rtx_REG (SImode, IP_REGNUM),
27691                                          NULL_RTX);
27692     }
27693
27694   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27695     return;
27696
27697   if (crtl->calls_eh_return)
27698     emit_insn (gen_addsi3 (stack_pointer_rtx,
27699                            stack_pointer_rtx,
27700                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27701
27702   if (IS_STACKALIGN (func_type))
27703     /* Restore the original stack pointer.  Before prologue, the stack was
27704        realigned and the original stack pointer saved in r0.  For details,
27705        see comment in arm_expand_prologue.  */
27706     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27707
27708   emit_jump_insn (simple_return_rtx);
27709 }
27710
27711 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27712    function is not a sibcall.  */
27713 void
27714 arm_expand_epilogue (bool really_return)
27715 {
27716   unsigned long func_type;
27717   unsigned long saved_regs_mask;
27718   int num_regs = 0;
27719   int i;
27720   int amount;
27721   arm_stack_offsets *offsets;
27722
27723   func_type = arm_current_func_type ();
27724
27725   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27726      let output_return_instruction take care of instruction emission if any.  */
27727   if (IS_NAKED (func_type)
27728       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27729     {
27730       if (really_return)
27731         emit_jump_insn (simple_return_rtx);
27732       return;
27733     }
27734
27735   /* If we are throwing an exception, then we really must be doing a
27736      return, so we can't tail-call.  */
27737   gcc_assert (!crtl->calls_eh_return || really_return);
27738
27739   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27740     {
27741       arm_expand_epilogue_apcs_frame (really_return);
27742       return;
27743     }
27744
27745   /* Get frame offsets for ARM.  */
27746   offsets = arm_get_frame_offsets ();
27747   saved_regs_mask = offsets->saved_regs_mask;
27748   num_regs = bit_count (saved_regs_mask);
27749
27750   if (frame_pointer_needed)
27751     {
27752       rtx_insn *insn;
27753       /* Restore stack pointer if necessary.  */
27754       if (TARGET_ARM)
27755         {
27756           /* In ARM mode, frame pointer points to first saved register.
27757              Restore stack pointer to last saved register.  */
27758           amount = offsets->frame - offsets->saved_regs;
27759
27760           /* Force out any pending memory operations that reference stacked data
27761              before stack de-allocation occurs.  */
27762           emit_insn (gen_blockage ());
27763           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27764                             hard_frame_pointer_rtx,
27765                             GEN_INT (amount)));
27766           arm_add_cfa_adjust_cfa_note (insn, amount,
27767                                        stack_pointer_rtx,
27768                                        hard_frame_pointer_rtx);
27769
27770           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27771              deleted.  */
27772           emit_insn (gen_force_register_use (stack_pointer_rtx));
27773         }
27774       else
27775         {
27776           /* In Thumb-2 mode, the frame pointer points to the last saved
27777              register.  */
27778           amount = offsets->locals_base - offsets->saved_regs;
27779           if (amount)
27780             {
27781               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27782                                 hard_frame_pointer_rtx,
27783                                 GEN_INT (amount)));
27784               arm_add_cfa_adjust_cfa_note (insn, amount,
27785                                            hard_frame_pointer_rtx,
27786                                            hard_frame_pointer_rtx);
27787             }
27788
27789           /* Force out any pending memory operations that reference stacked data
27790              before stack de-allocation occurs.  */
27791           emit_insn (gen_blockage ());
27792           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27793                                        hard_frame_pointer_rtx));
27794           arm_add_cfa_adjust_cfa_note (insn, 0,
27795                                        stack_pointer_rtx,
27796                                        hard_frame_pointer_rtx);
27797           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27798              deleted.  */
27799           emit_insn (gen_force_register_use (stack_pointer_rtx));
27800         }
27801     }
27802   else
27803     {
27804       /* Pop off outgoing args and local frame to adjust stack pointer to
27805          last saved register.  */
27806       amount = offsets->outgoing_args - offsets->saved_regs;
27807       if (amount)
27808         {
27809           rtx_insn *tmp;
27810           /* Force out any pending memory operations that reference stacked data
27811              before stack de-allocation occurs.  */
27812           emit_insn (gen_blockage ());
27813           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27814                                        stack_pointer_rtx,
27815                                        GEN_INT (amount)));
27816           arm_add_cfa_adjust_cfa_note (tmp, amount,
27817                                        stack_pointer_rtx, stack_pointer_rtx);
27818           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27819              not deleted.  */
27820           emit_insn (gen_force_register_use (stack_pointer_rtx));
27821         }
27822     }
27823
27824   if (TARGET_HARD_FLOAT && TARGET_VFP)
27825     {
27826       /* Generate VFP register multi-pop.  */
27827       int end_reg = LAST_VFP_REGNUM + 1;
27828
27829       /* Scan the registers in reverse order.  We need to match
27830          any groupings made in the prologue and generate matching
27831          vldm operations.  The need to match groups is because,
27832          unlike pop, vldm can only do consecutive regs.  */
27833       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27834         /* Look for a case where a reg does not need restoring.  */
27835         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27836             && (!df_regs_ever_live_p (i + 1)
27837                 || call_used_regs[i + 1]))
27838           {
27839             /* Restore the regs discovered so far (from reg+2 to
27840                end_reg).  */
27841             if (end_reg > i + 2)
27842               arm_emit_vfp_multi_reg_pop (i + 2,
27843                                           (end_reg - (i + 2)) / 2,
27844                                           stack_pointer_rtx);
27845             end_reg = i;
27846           }
27847
27848       /* Restore the remaining regs that we have discovered (or possibly
27849          even all of them, if the conditional in the for loop never
27850          fired).  */
27851       if (end_reg > i + 2)
27852         arm_emit_vfp_multi_reg_pop (i + 2,
27853                                     (end_reg - (i + 2)) / 2,
27854                                     stack_pointer_rtx);
27855     }
27856
27857   if (TARGET_IWMMXT)
27858     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27859       if (df_regs_ever_live_p (i) && !call_used_regs[i])
27860         {
27861           rtx_insn *insn;
27862           rtx addr = gen_rtx_MEM (V2SImode,
27863                                   gen_rtx_POST_INC (SImode,
27864                                                     stack_pointer_rtx));
27865           set_mem_alias_set (addr, get_frame_alias_set ());
27866           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27867           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27868                                              gen_rtx_REG (V2SImode, i),
27869                                              NULL_RTX);
27870           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27871                                        stack_pointer_rtx, stack_pointer_rtx);
27872         }
27873
27874   if (saved_regs_mask)
27875     {
27876       rtx insn;
27877       bool return_in_pc = false;
27878
27879       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27880           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27881           && !IS_STACKALIGN (func_type)
27882           && really_return
27883           && crtl->args.pretend_args_size == 0
27884           && saved_regs_mask & (1 << LR_REGNUM)
27885           && !crtl->calls_eh_return)
27886         {
27887           saved_regs_mask &= ~(1 << LR_REGNUM);
27888           saved_regs_mask |= (1 << PC_REGNUM);
27889           return_in_pc = true;
27890         }
27891
27892       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27893         {
27894           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27895             if (saved_regs_mask & (1 << i))
27896               {
27897                 rtx addr = gen_rtx_MEM (SImode,
27898                                         gen_rtx_POST_INC (SImode,
27899                                                           stack_pointer_rtx));
27900                 set_mem_alias_set (addr, get_frame_alias_set ());
27901
27902                 if (i == PC_REGNUM)
27903                   {
27904                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27905                     XVECEXP (insn, 0, 0) = ret_rtx;
27906                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27907                                                         gen_rtx_REG (SImode, i),
27908                                                         addr);
27909                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27910                     insn = emit_jump_insn (insn);
27911                   }
27912                 else
27913                   {
27914                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27915                                                  addr));
27916                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27917                                                        gen_rtx_REG (SImode, i),
27918                                                        NULL_RTX);
27919                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27920                                                  stack_pointer_rtx,
27921                                                  stack_pointer_rtx);
27922                   }
27923               }
27924         }
27925       else
27926         {
27927           if (TARGET_LDRD
27928               && current_tune->prefer_ldrd_strd
27929               && !optimize_function_for_size_p (cfun))
27930             {
27931               if (TARGET_THUMB2)
27932                 thumb2_emit_ldrd_pop (saved_regs_mask);
27933               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27934                 arm_emit_ldrd_pop (saved_regs_mask);
27935               else
27936                 arm_emit_multi_reg_pop (saved_regs_mask);
27937             }
27938           else
27939             arm_emit_multi_reg_pop (saved_regs_mask);
27940         }
27941
27942       if (return_in_pc == true)
27943         return;
27944     }
27945
27946   if (crtl->args.pretend_args_size)
27947     {
27948       int i, j;
27949       rtx dwarf = NULL_RTX;
27950       rtx_insn *tmp =
27951         emit_insn (gen_addsi3 (stack_pointer_rtx,
27952                                stack_pointer_rtx,
27953                                GEN_INT (crtl->args.pretend_args_size)));
27954
27955       RTX_FRAME_RELATED_P (tmp) = 1;
27956
27957       if (cfun->machine->uses_anonymous_args)
27958         {
27959           /* Restore pretend args.  Refer arm_expand_prologue on how to save
27960              pretend_args in stack.  */
27961           int num_regs = crtl->args.pretend_args_size / 4;
27962           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27963           for (j = 0, i = 0; j < num_regs; i++)
27964             if (saved_regs_mask & (1 << i))
27965               {
27966                 rtx reg = gen_rtx_REG (SImode, i);
27967                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27968                 j++;
27969               }
27970           REG_NOTES (tmp) = dwarf;
27971         }
27972       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27973                                    stack_pointer_rtx, stack_pointer_rtx);
27974     }
27975
27976   if (!really_return)
27977     return;
27978
27979   if (crtl->calls_eh_return)
27980     emit_insn (gen_addsi3 (stack_pointer_rtx,
27981                            stack_pointer_rtx,
27982                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27983
27984   if (IS_STACKALIGN (func_type))
27985     /* Restore the original stack pointer.  Before prologue, the stack was
27986        realigned and the original stack pointer saved in r0.  For details,
27987        see comment in arm_expand_prologue.  */
27988     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27989
27990   emit_jump_insn (simple_return_rtx);
27991 }
27992
27993 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27994    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27995
27996 const char *
27997 thumb1_output_interwork (void)
27998 {
27999   const char * name;
28000   FILE *f = asm_out_file;
28001
28002   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28003   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28004               == SYMBOL_REF);
28005   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28006
28007   /* Generate code sequence to switch us into Thumb mode.  */
28008   /* The .code 32 directive has already been emitted by
28009      ASM_DECLARE_FUNCTION_NAME.  */
28010   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28011   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28012
28013   /* Generate a label, so that the debugger will notice the
28014      change in instruction sets.  This label is also used by
28015      the assembler to bypass the ARM code when this function
28016      is called from a Thumb encoded function elsewhere in the
28017      same file.  Hence the definition of STUB_NAME here must
28018      agree with the definition in gas/config/tc-arm.c.  */
28019
28020 #define STUB_NAME ".real_start_of"
28021
28022   fprintf (f, "\t.code\t16\n");
28023 #ifdef ARM_PE
28024   if (arm_dllexport_name_p (name))
28025     name = arm_strip_name_encoding (name);
28026 #endif
28027   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28028   fprintf (f, "\t.thumb_func\n");
28029   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28030
28031   return "";
28032 }
28033
28034 /* Handle the case of a double word load into a low register from
28035    a computed memory address.  The computed address may involve a
28036    register which is overwritten by the load.  */
28037 const char *
28038 thumb_load_double_from_address (rtx *operands)
28039 {
28040   rtx addr;
28041   rtx base;
28042   rtx offset;
28043   rtx arg1;
28044   rtx arg2;
28045
28046   gcc_assert (REG_P (operands[0]));
28047   gcc_assert (MEM_P (operands[1]));
28048
28049   /* Get the memory address.  */
28050   addr = XEXP (operands[1], 0);
28051
28052   /* Work out how the memory address is computed.  */
28053   switch (GET_CODE (addr))
28054     {
28055     case REG:
28056       operands[2] = adjust_address (operands[1], SImode, 4);
28057
28058       if (REGNO (operands[0]) == REGNO (addr))
28059         {
28060           output_asm_insn ("ldr\t%H0, %2", operands);
28061           output_asm_insn ("ldr\t%0, %1", operands);
28062         }
28063       else
28064         {
28065           output_asm_insn ("ldr\t%0, %1", operands);
28066           output_asm_insn ("ldr\t%H0, %2", operands);
28067         }
28068       break;
28069
28070     case CONST:
28071       /* Compute <address> + 4 for the high order load.  */
28072       operands[2] = adjust_address (operands[1], SImode, 4);
28073
28074       output_asm_insn ("ldr\t%0, %1", operands);
28075       output_asm_insn ("ldr\t%H0, %2", operands);
28076       break;
28077
28078     case PLUS:
28079       arg1   = XEXP (addr, 0);
28080       arg2   = XEXP (addr, 1);
28081
28082       if (CONSTANT_P (arg1))
28083         base = arg2, offset = arg1;
28084       else
28085         base = arg1, offset = arg2;
28086
28087       gcc_assert (REG_P (base));
28088
28089       /* Catch the case of <address> = <reg> + <reg> */
28090       if (REG_P (offset))
28091         {
28092           int reg_offset = REGNO (offset);
28093           int reg_base   = REGNO (base);
28094           int reg_dest   = REGNO (operands[0]);
28095
28096           /* Add the base and offset registers together into the
28097              higher destination register.  */
28098           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28099                        reg_dest + 1, reg_base, reg_offset);
28100
28101           /* Load the lower destination register from the address in
28102              the higher destination register.  */
28103           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28104                        reg_dest, reg_dest + 1);
28105
28106           /* Load the higher destination register from its own address
28107              plus 4.  */
28108           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28109                        reg_dest + 1, reg_dest + 1);
28110         }
28111       else
28112         {
28113           /* Compute <address> + 4 for the high order load.  */
28114           operands[2] = adjust_address (operands[1], SImode, 4);
28115
28116           /* If the computed address is held in the low order register
28117              then load the high order register first, otherwise always
28118              load the low order register first.  */
28119           if (REGNO (operands[0]) == REGNO (base))
28120             {
28121               output_asm_insn ("ldr\t%H0, %2", operands);
28122               output_asm_insn ("ldr\t%0, %1", operands);
28123             }
28124           else
28125             {
28126               output_asm_insn ("ldr\t%0, %1", operands);
28127               output_asm_insn ("ldr\t%H0, %2", operands);
28128             }
28129         }
28130       break;
28131
28132     case LABEL_REF:
28133       /* With no registers to worry about we can just load the value
28134          directly.  */
28135       operands[2] = adjust_address (operands[1], SImode, 4);
28136
28137       output_asm_insn ("ldr\t%H0, %2", operands);
28138       output_asm_insn ("ldr\t%0, %1", operands);
28139       break;
28140
28141     default:
28142       gcc_unreachable ();
28143     }
28144
28145   return "";
28146 }
28147
28148 const char *
28149 thumb_output_move_mem_multiple (int n, rtx *operands)
28150 {
28151   rtx tmp;
28152
28153   switch (n)
28154     {
28155     case 2:
28156       if (REGNO (operands[4]) > REGNO (operands[5]))
28157         {
28158           tmp = operands[4];
28159           operands[4] = operands[5];
28160           operands[5] = tmp;
28161         }
28162       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28163       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28164       break;
28165
28166     case 3:
28167       if (REGNO (operands[4]) > REGNO (operands[5]))
28168         {
28169           tmp = operands[4];
28170           operands[4] = operands[5];
28171           operands[5] = tmp;
28172         }
28173       if (REGNO (operands[5]) > REGNO (operands[6]))
28174         {
28175           tmp = operands[5];
28176           operands[5] = operands[6];
28177           operands[6] = tmp;
28178         }
28179       if (REGNO (operands[4]) > REGNO (operands[5]))
28180         {
28181           tmp = operands[4];
28182           operands[4] = operands[5];
28183           operands[5] = tmp;
28184         }
28185
28186       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28187       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28188       break;
28189
28190     default:
28191       gcc_unreachable ();
28192     }
28193
28194   return "";
28195 }
28196
28197 /* Output a call-via instruction for thumb state.  */
28198 const char *
28199 thumb_call_via_reg (rtx reg)
28200 {
28201   int regno = REGNO (reg);
28202   rtx *labelp;
28203
28204   gcc_assert (regno < LR_REGNUM);
28205
28206   /* If we are in the normal text section we can use a single instance
28207      per compilation unit.  If we are doing function sections, then we need
28208      an entry per section, since we can't rely on reachability.  */
28209   if (in_section == text_section)
28210     {
28211       thumb_call_reg_needed = 1;
28212
28213       if (thumb_call_via_label[regno] == NULL)
28214         thumb_call_via_label[regno] = gen_label_rtx ();
28215       labelp = thumb_call_via_label + regno;
28216     }
28217   else
28218     {
28219       if (cfun->machine->call_via[regno] == NULL)
28220         cfun->machine->call_via[regno] = gen_label_rtx ();
28221       labelp = cfun->machine->call_via + regno;
28222     }
28223
28224   output_asm_insn ("bl\t%a0", labelp);
28225   return "";
28226 }
28227
28228 /* Routines for generating rtl.  */
28229 void
28230 thumb_expand_movmemqi (rtx *operands)
28231 {
28232   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28233   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28234   HOST_WIDE_INT len = INTVAL (operands[2]);
28235   HOST_WIDE_INT offset = 0;
28236
28237   while (len >= 12)
28238     {
28239       emit_insn (gen_movmem12b (out, in, out, in));
28240       len -= 12;
28241     }
28242
28243   if (len >= 8)
28244     {
28245       emit_insn (gen_movmem8b (out, in, out, in));
28246       len -= 8;
28247     }
28248
28249   if (len >= 4)
28250     {
28251       rtx reg = gen_reg_rtx (SImode);
28252       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28253       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28254       len -= 4;
28255       offset += 4;
28256     }
28257
28258   if (len >= 2)
28259     {
28260       rtx reg = gen_reg_rtx (HImode);
28261       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28262                                               plus_constant (Pmode, in,
28263                                                              offset))));
28264       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28265                                                                 offset)),
28266                             reg));
28267       len -= 2;
28268       offset += 2;
28269     }
28270
28271   if (len)
28272     {
28273       rtx reg = gen_reg_rtx (QImode);
28274       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28275                                               plus_constant (Pmode, in,
28276                                                              offset))));
28277       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28278                                                                 offset)),
28279                             reg));
28280     }
28281 }
28282
28283 void
28284 thumb_reload_out_hi (rtx *operands)
28285 {
28286   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28287 }
28288
28289 /* Handle reading a half-word from memory during reload.  */
28290 void
28291 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28292 {
28293   gcc_unreachable ();
28294 }
28295
28296 /* Return the length of a function name prefix
28297     that starts with the character 'c'.  */
28298 static int
28299 arm_get_strip_length (int c)
28300 {
28301   switch (c)
28302     {
28303     ARM_NAME_ENCODING_LENGTHS
28304       default: return 0;
28305     }
28306 }
28307
28308 /* Return a pointer to a function's name with any
28309    and all prefix encodings stripped from it.  */
28310 const char *
28311 arm_strip_name_encoding (const char *name)
28312 {
28313   int skip;
28314
28315   while ((skip = arm_get_strip_length (* name)))
28316     name += skip;
28317
28318   return name;
28319 }
28320
28321 /* If there is a '*' anywhere in the name's prefix, then
28322    emit the stripped name verbatim, otherwise prepend an
28323    underscore if leading underscores are being used.  */
28324 void
28325 arm_asm_output_labelref (FILE *stream, const char *name)
28326 {
28327   int skip;
28328   int verbatim = 0;
28329
28330   while ((skip = arm_get_strip_length (* name)))
28331     {
28332       verbatim |= (*name == '*');
28333       name += skip;
28334     }
28335
28336   if (verbatim)
28337     fputs (name, stream);
28338   else
28339     asm_fprintf (stream, "%U%s", name);
28340 }
28341
28342 /* This function is used to emit an EABI tag and its associated value.
28343    We emit the numerical value of the tag in case the assembler does not
28344    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28345    the tag name in a comment so that anyone reading the assembler output
28346    will know which tag is being set.
28347
28348    This function is not static because arm-c.c needs it too.  */
28349
28350 void
28351 arm_emit_eabi_attribute (const char *name, int num, int val)
28352 {
28353   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28354   if (flag_verbose_asm || flag_debug_asm)
28355     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28356   asm_fprintf (asm_out_file, "\n");
28357 }
28358
28359 static void
28360 arm_file_start (void)
28361 {
28362   int val;
28363
28364   if (TARGET_UNIFIED_ASM)
28365     asm_fprintf (asm_out_file, "\t.syntax unified\n");
28366
28367   if (TARGET_BPABI)
28368     {
28369       const char *fpu_name;
28370       if (arm_selected_arch)
28371         {
28372           /* armv7ve doesn't support any extensions.  */
28373           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28374             {
28375               /* Keep backward compatability for assemblers
28376                  which don't support armv7ve.  */
28377               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28378               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28379               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28380               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28381               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28382             }
28383           else
28384             {
28385               const char* pos = strchr (arm_selected_arch->name, '+');
28386               if (pos)
28387                 {
28388                   char buf[15];
28389                   gcc_assert (strlen (arm_selected_arch->name)
28390                               <= sizeof (buf) / sizeof (*pos));
28391                   strncpy (buf, arm_selected_arch->name,
28392                                 (pos - arm_selected_arch->name) * sizeof (*pos));
28393                   buf[pos - arm_selected_arch->name] = '\0';
28394                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28395                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28396                 }
28397               else
28398                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28399             }
28400         }
28401       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28402         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28403       else
28404         {
28405           const char* truncated_name
28406             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28407           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28408         }
28409
28410       if (TARGET_SOFT_FLOAT)
28411         {
28412           fpu_name = "softvfp";
28413         }
28414       else
28415         {
28416           fpu_name = arm_fpu_desc->name;
28417           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28418             {
28419               if (TARGET_HARD_FLOAT)
28420                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28421               if (TARGET_HARD_FLOAT_ABI)
28422                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28423             }
28424         }
28425       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28426
28427       /* Some of these attributes only apply when the corresponding features
28428          are used.  However we don't have any easy way of figuring this out.
28429          Conservatively record the setting that would have been used.  */
28430
28431       if (flag_rounding_math)
28432         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28433
28434       if (!flag_unsafe_math_optimizations)
28435         {
28436           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28437           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28438         }
28439       if (flag_signaling_nans)
28440         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28441
28442       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28443                            flag_finite_math_only ? 1 : 3);
28444
28445       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28446       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28447       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28448                                flag_short_enums ? 1 : 2);
28449
28450       /* Tag_ABI_optimization_goals.  */
28451       if (optimize_size)
28452         val = 4;
28453       else if (optimize >= 2)
28454         val = 2;
28455       else if (optimize)
28456         val = 1;
28457       else
28458         val = 6;
28459       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28460
28461       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28462                                unaligned_access);
28463
28464       if (arm_fp16_format)
28465         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28466                              (int) arm_fp16_format);
28467
28468       if (arm_lang_output_object_attributes_hook)
28469         arm_lang_output_object_attributes_hook();
28470     }
28471
28472   default_file_start ();
28473 }
28474
28475 static void
28476 arm_file_end (void)
28477 {
28478   int regno;
28479
28480   if (NEED_INDICATE_EXEC_STACK)
28481     /* Add .note.GNU-stack.  */
28482     file_end_indicate_exec_stack ();
28483
28484   if (! thumb_call_reg_needed)
28485     return;
28486
28487   switch_to_section (text_section);
28488   asm_fprintf (asm_out_file, "\t.code 16\n");
28489   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28490
28491   for (regno = 0; regno < LR_REGNUM; regno++)
28492     {
28493       rtx label = thumb_call_via_label[regno];
28494
28495       if (label != 0)
28496         {
28497           targetm.asm_out.internal_label (asm_out_file, "L",
28498                                           CODE_LABEL_NUMBER (label));
28499           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28500         }
28501     }
28502 }
28503
28504 #ifndef ARM_PE
28505 /* Symbols in the text segment can be accessed without indirecting via the
28506    constant pool; it may take an extra binary operation, but this is still
28507    faster than indirecting via memory.  Don't do this when not optimizing,
28508    since we won't be calculating al of the offsets necessary to do this
28509    simplification.  */
28510
28511 static void
28512 arm_encode_section_info (tree decl, rtx rtl, int first)
28513 {
28514   if (optimize > 0 && TREE_CONSTANT (decl))
28515     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28516
28517   default_encode_section_info (decl, rtl, first);
28518 }
28519 #endif /* !ARM_PE */
28520
28521 static void
28522 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28523 {
28524   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28525       && !strcmp (prefix, "L"))
28526     {
28527       arm_ccfsm_state = 0;
28528       arm_target_insn = NULL;
28529     }
28530   default_internal_label (stream, prefix, labelno);
28531 }
28532
28533 /* Output code to add DELTA to the first argument, and then jump
28534    to FUNCTION.  Used for C++ multiple inheritance.  */
28535 static void
28536 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28537                      HOST_WIDE_INT delta,
28538                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28539                      tree function)
28540 {
28541   static int thunk_label = 0;
28542   char label[256];
28543   char labelpc[256];
28544   int mi_delta = delta;
28545   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28546   int shift = 0;
28547   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28548                     ? 1 : 0);
28549   if (mi_delta < 0)
28550     mi_delta = - mi_delta;
28551
28552   final_start_function (emit_barrier (), file, 1);
28553
28554   if (TARGET_THUMB1)
28555     {
28556       int labelno = thunk_label++;
28557       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28558       /* Thunks are entered in arm mode when avaiable.  */
28559       if (TARGET_THUMB1_ONLY)
28560         {
28561           /* push r3 so we can use it as a temporary.  */
28562           /* TODO: Omit this save if r3 is not used.  */
28563           fputs ("\tpush {r3}\n", file);
28564           fputs ("\tldr\tr3, ", file);
28565         }
28566       else
28567         {
28568           fputs ("\tldr\tr12, ", file);
28569         }
28570       assemble_name (file, label);
28571       fputc ('\n', file);
28572       if (flag_pic)
28573         {
28574           /* If we are generating PIC, the ldr instruction below loads
28575              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28576              the address of the add + 8, so we have:
28577
28578              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28579                  = target + 1.
28580
28581              Note that we have "+ 1" because some versions of GNU ld
28582              don't set the low bit of the result for R_ARM_REL32
28583              relocations against thumb function symbols.
28584              On ARMv6M this is +4, not +8.  */
28585           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28586           assemble_name (file, labelpc);
28587           fputs (":\n", file);
28588           if (TARGET_THUMB1_ONLY)
28589             {
28590               /* This is 2 insns after the start of the thunk, so we know it
28591                  is 4-byte aligned.  */
28592               fputs ("\tadd\tr3, pc, r3\n", file);
28593               fputs ("\tmov r12, r3\n", file);
28594             }
28595           else
28596             fputs ("\tadd\tr12, pc, r12\n", file);
28597         }
28598       else if (TARGET_THUMB1_ONLY)
28599         fputs ("\tmov r12, r3\n", file);
28600     }
28601   if (TARGET_THUMB1_ONLY)
28602     {
28603       if (mi_delta > 255)
28604         {
28605           fputs ("\tldr\tr3, ", file);
28606           assemble_name (file, label);
28607           fputs ("+4\n", file);
28608           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28609                        mi_op, this_regno, this_regno);
28610         }
28611       else if (mi_delta != 0)
28612         {
28613           /* Thumb1 unified syntax requires s suffix in instruction name when
28614              one of the operands is immediate.  */
28615           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28616                        mi_op, this_regno, this_regno,
28617                        mi_delta);
28618         }
28619     }
28620   else
28621     {
28622       /* TODO: Use movw/movt for large constants when available.  */
28623       while (mi_delta != 0)
28624         {
28625           if ((mi_delta & (3 << shift)) == 0)
28626             shift += 2;
28627           else
28628             {
28629               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28630                            mi_op, this_regno, this_regno,
28631                            mi_delta & (0xff << shift));
28632               mi_delta &= ~(0xff << shift);
28633               shift += 8;
28634             }
28635         }
28636     }
28637   if (TARGET_THUMB1)
28638     {
28639       if (TARGET_THUMB1_ONLY)
28640         fputs ("\tpop\t{r3}\n", file);
28641
28642       fprintf (file, "\tbx\tr12\n");
28643       ASM_OUTPUT_ALIGN (file, 2);
28644       assemble_name (file, label);
28645       fputs (":\n", file);
28646       if (flag_pic)
28647         {
28648           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28649           rtx tem = XEXP (DECL_RTL (function), 0);
28650           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28651              pipeline offset is four rather than eight.  Adjust the offset
28652              accordingly.  */
28653           tem = plus_constant (GET_MODE (tem), tem,
28654                                TARGET_THUMB1_ONLY ? -3 : -7);
28655           tem = gen_rtx_MINUS (GET_MODE (tem),
28656                                tem,
28657                                gen_rtx_SYMBOL_REF (Pmode,
28658                                                    ggc_strdup (labelpc)));
28659           assemble_integer (tem, 4, BITS_PER_WORD, 1);
28660         }
28661       else
28662         /* Output ".word .LTHUNKn".  */
28663         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28664
28665       if (TARGET_THUMB1_ONLY && mi_delta > 255)
28666         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28667     }
28668   else
28669     {
28670       fputs ("\tb\t", file);
28671       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28672       if (NEED_PLT_RELOC)
28673         fputs ("(PLT)", file);
28674       fputc ('\n', file);
28675     }
28676
28677   final_end_function ();
28678 }
28679
28680 int
28681 arm_emit_vector_const (FILE *file, rtx x)
28682 {
28683   int i;
28684   const char * pattern;
28685
28686   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28687
28688   switch (GET_MODE (x))
28689     {
28690     case V2SImode: pattern = "%08x"; break;
28691     case V4HImode: pattern = "%04x"; break;
28692     case V8QImode: pattern = "%02x"; break;
28693     default:       gcc_unreachable ();
28694     }
28695
28696   fprintf (file, "0x");
28697   for (i = CONST_VECTOR_NUNITS (x); i--;)
28698     {
28699       rtx element;
28700
28701       element = CONST_VECTOR_ELT (x, i);
28702       fprintf (file, pattern, INTVAL (element));
28703     }
28704
28705   return 1;
28706 }
28707
28708 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28709    HFmode constant pool entries are actually loaded with ldr.  */
28710 void
28711 arm_emit_fp16_const (rtx c)
28712 {
28713   REAL_VALUE_TYPE r;
28714   long bits;
28715
28716   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28717   bits = real_to_target (NULL, &r, HFmode);
28718   if (WORDS_BIG_ENDIAN)
28719     assemble_zeros (2);
28720   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28721   if (!WORDS_BIG_ENDIAN)
28722     assemble_zeros (2);
28723 }
28724
28725 const char *
28726 arm_output_load_gr (rtx *operands)
28727 {
28728   rtx reg;
28729   rtx offset;
28730   rtx wcgr;
28731   rtx sum;
28732
28733   if (!MEM_P (operands [1])
28734       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28735       || !REG_P (reg = XEXP (sum, 0))
28736       || !CONST_INT_P (offset = XEXP (sum, 1))
28737       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28738     return "wldrw%?\t%0, %1";
28739
28740   /* Fix up an out-of-range load of a GR register.  */
28741   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28742   wcgr = operands[0];
28743   operands[0] = reg;
28744   output_asm_insn ("ldr%?\t%0, %1", operands);
28745
28746   operands[0] = wcgr;
28747   operands[1] = reg;
28748   output_asm_insn ("tmcr%?\t%0, %1", operands);
28749   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28750
28751   return "";
28752 }
28753
28754 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28755
28756    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28757    named arg and all anonymous args onto the stack.
28758    XXX I know the prologue shouldn't be pushing registers, but it is faster
28759    that way.  */
28760
28761 static void
28762 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28763                             machine_mode mode,
28764                             tree type,
28765                             int *pretend_size,
28766                             int second_time ATTRIBUTE_UNUSED)
28767 {
28768   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28769   int nregs;
28770
28771   cfun->machine->uses_anonymous_args = 1;
28772   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28773     {
28774       nregs = pcum->aapcs_ncrn;
28775       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28776         nregs++;
28777     }
28778   else
28779     nregs = pcum->nregs;
28780
28781   if (nregs < NUM_ARG_REGS)
28782     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28783 }
28784
28785 /* We can't rely on the caller doing the proper promotion when
28786    using APCS or ATPCS.  */
28787
28788 static bool
28789 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28790 {
28791     return !TARGET_AAPCS_BASED;
28792 }
28793
28794 static machine_mode
28795 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28796                            machine_mode mode,
28797                            int *punsignedp ATTRIBUTE_UNUSED,
28798                            const_tree fntype ATTRIBUTE_UNUSED,
28799                            int for_return ATTRIBUTE_UNUSED)
28800 {
28801   if (GET_MODE_CLASS (mode) == MODE_INT
28802       && GET_MODE_SIZE (mode) < 4)
28803     return SImode;
28804
28805   return mode;
28806 }
28807
28808 /* AAPCS based ABIs use short enums by default.  */
28809
28810 static bool
28811 arm_default_short_enums (void)
28812 {
28813   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28814 }
28815
28816
28817 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28818
28819 static bool
28820 arm_align_anon_bitfield (void)
28821 {
28822   return TARGET_AAPCS_BASED;
28823 }
28824
28825
28826 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28827
28828 static tree
28829 arm_cxx_guard_type (void)
28830 {
28831   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28832 }
28833
28834
28835 /* The EABI says test the least significant bit of a guard variable.  */
28836
28837 static bool
28838 arm_cxx_guard_mask_bit (void)
28839 {
28840   return TARGET_AAPCS_BASED;
28841 }
28842
28843
28844 /* The EABI specifies that all array cookies are 8 bytes long.  */
28845
28846 static tree
28847 arm_get_cookie_size (tree type)
28848 {
28849   tree size;
28850
28851   if (!TARGET_AAPCS_BASED)
28852     return default_cxx_get_cookie_size (type);
28853
28854   size = build_int_cst (sizetype, 8);
28855   return size;
28856 }
28857
28858
28859 /* The EABI says that array cookies should also contain the element size.  */
28860
28861 static bool
28862 arm_cookie_has_size (void)
28863 {
28864   return TARGET_AAPCS_BASED;
28865 }
28866
28867
28868 /* The EABI says constructors and destructors should return a pointer to
28869    the object constructed/destroyed.  */
28870
28871 static bool
28872 arm_cxx_cdtor_returns_this (void)
28873 {
28874   return TARGET_AAPCS_BASED;
28875 }
28876
28877 /* The EABI says that an inline function may never be the key
28878    method.  */
28879
28880 static bool
28881 arm_cxx_key_method_may_be_inline (void)
28882 {
28883   return !TARGET_AAPCS_BASED;
28884 }
28885
28886 static void
28887 arm_cxx_determine_class_data_visibility (tree decl)
28888 {
28889   if (!TARGET_AAPCS_BASED
28890       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28891     return;
28892
28893   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28894      is exported.  However, on systems without dynamic vague linkage,
28895      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
28896   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28897     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28898   else
28899     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28900   DECL_VISIBILITY_SPECIFIED (decl) = 1;
28901 }
28902
28903 static bool
28904 arm_cxx_class_data_always_comdat (void)
28905 {
28906   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28907      vague linkage if the class has no key function.  */
28908   return !TARGET_AAPCS_BASED;
28909 }
28910
28911
28912 /* The EABI says __aeabi_atexit should be used to register static
28913    destructors.  */
28914
28915 static bool
28916 arm_cxx_use_aeabi_atexit (void)
28917 {
28918   return TARGET_AAPCS_BASED;
28919 }
28920
28921
28922 void
28923 arm_set_return_address (rtx source, rtx scratch)
28924 {
28925   arm_stack_offsets *offsets;
28926   HOST_WIDE_INT delta;
28927   rtx addr;
28928   unsigned long saved_regs;
28929
28930   offsets = arm_get_frame_offsets ();
28931   saved_regs = offsets->saved_regs_mask;
28932
28933   if ((saved_regs & (1 << LR_REGNUM)) == 0)
28934     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28935   else
28936     {
28937       if (frame_pointer_needed)
28938         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28939       else
28940         {
28941           /* LR will be the first saved register.  */
28942           delta = offsets->outgoing_args - (offsets->frame + 4);
28943
28944
28945           if (delta >= 4096)
28946             {
28947               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28948                                      GEN_INT (delta & ~4095)));
28949               addr = scratch;
28950               delta &= 4095;
28951             }
28952           else
28953             addr = stack_pointer_rtx;
28954
28955           addr = plus_constant (Pmode, addr, delta);
28956         }
28957       /* The store needs to be marked as frame related in order to prevent
28958          DSE from deleting it as dead if it is based on fp.  */
28959       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
28960       RTX_FRAME_RELATED_P (insn) = 1;
28961       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
28962     }
28963 }
28964
28965
28966 void
28967 thumb_set_return_address (rtx source, rtx scratch)
28968 {
28969   arm_stack_offsets *offsets;
28970   HOST_WIDE_INT delta;
28971   HOST_WIDE_INT limit;
28972   int reg;
28973   rtx addr;
28974   unsigned long mask;
28975
28976   emit_use (source);
28977
28978   offsets = arm_get_frame_offsets ();
28979   mask = offsets->saved_regs_mask;
28980   if (mask & (1 << LR_REGNUM))
28981     {
28982       limit = 1024;
28983       /* Find the saved regs.  */
28984       if (frame_pointer_needed)
28985         {
28986           delta = offsets->soft_frame - offsets->saved_args;
28987           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28988           if (TARGET_THUMB1)
28989             limit = 128;
28990         }
28991       else
28992         {
28993           delta = offsets->outgoing_args - offsets->saved_args;
28994           reg = SP_REGNUM;
28995         }
28996       /* Allow for the stack frame.  */
28997       if (TARGET_THUMB1 && TARGET_BACKTRACE)
28998         delta -= 16;
28999       /* The link register is always the first saved register.  */
29000       delta -= 4;
29001
29002       /* Construct the address.  */
29003       addr = gen_rtx_REG (SImode, reg);
29004       if (delta > limit)
29005         {
29006           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29007           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29008           addr = scratch;
29009         }
29010       else
29011         addr = plus_constant (Pmode, addr, delta);
29012
29013       /* The store needs to be marked as frame related in order to prevent
29014          DSE from deleting it as dead if it is based on fp.  */
29015       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29016       RTX_FRAME_RELATED_P (insn) = 1;
29017       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29018     }
29019   else
29020     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29021 }
29022
29023 /* Implements target hook vector_mode_supported_p.  */
29024 bool
29025 arm_vector_mode_supported_p (machine_mode mode)
29026 {
29027   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29028   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29029       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29030     return true;
29031
29032   if ((TARGET_NEON || TARGET_IWMMXT)
29033       && ((mode == V2SImode)
29034           || (mode == V4HImode)
29035           || (mode == V8QImode)))
29036     return true;
29037
29038   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29039       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29040       || mode == V2HAmode))
29041     return true;
29042
29043   return false;
29044 }
29045
29046 /* Implements target hook array_mode_supported_p.  */
29047
29048 static bool
29049 arm_array_mode_supported_p (machine_mode mode,
29050                             unsigned HOST_WIDE_INT nelems)
29051 {
29052   if (TARGET_NEON
29053       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29054       && (nelems >= 2 && nelems <= 4))
29055     return true;
29056
29057   return false;
29058 }
29059
29060 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29061    registers when autovectorizing for Neon, at least until multiple vector
29062    widths are supported properly by the middle-end.  */
29063
29064 static machine_mode
29065 arm_preferred_simd_mode (machine_mode mode)
29066 {
29067   if (TARGET_NEON)
29068     switch (mode)
29069       {
29070       case SFmode:
29071         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29072       case SImode:
29073         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29074       case HImode:
29075         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29076       case QImode:
29077         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29078       case DImode:
29079         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29080           return V2DImode;
29081         break;
29082
29083       default:;
29084       }
29085
29086   if (TARGET_REALLY_IWMMXT)
29087     switch (mode)
29088       {
29089       case SImode:
29090         return V2SImode;
29091       case HImode:
29092         return V4HImode;
29093       case QImode:
29094         return V8QImode;
29095
29096       default:;
29097       }
29098
29099   return word_mode;
29100 }
29101
29102 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29103
29104    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29105    using r0-r4 for function arguments, r7 for the stack frame and don't have
29106    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29107    potentially problematic instructions accept high registers so this is not
29108    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29109    that require many low registers.  */
29110 static bool
29111 arm_class_likely_spilled_p (reg_class_t rclass)
29112 {
29113   if ((TARGET_THUMB1 && rclass == LO_REGS)
29114       || rclass  == CC_REG)
29115     return true;
29116
29117   return false;
29118 }
29119
29120 /* Implements target hook small_register_classes_for_mode_p.  */
29121 bool
29122 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29123 {
29124   return TARGET_THUMB1;
29125 }
29126
29127 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29128    ARM insns and therefore guarantee that the shift count is modulo 256.
29129    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29130    guarantee no particular behavior for out-of-range counts.  */
29131
29132 static unsigned HOST_WIDE_INT
29133 arm_shift_truncation_mask (machine_mode mode)
29134 {
29135   return mode == SImode ? 255 : 0;
29136 }
29137
29138
29139 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29140
29141 unsigned int
29142 arm_dbx_register_number (unsigned int regno)
29143 {
29144   if (regno < 16)
29145     return regno;
29146
29147   if (IS_VFP_REGNUM (regno))
29148     {
29149       /* See comment in arm_dwarf_register_span.  */
29150       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29151         return 64 + regno - FIRST_VFP_REGNUM;
29152       else
29153         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29154     }
29155
29156   if (IS_IWMMXT_GR_REGNUM (regno))
29157     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29158
29159   if (IS_IWMMXT_REGNUM (regno))
29160     return 112 + regno - FIRST_IWMMXT_REGNUM;
29161
29162   gcc_unreachable ();
29163 }
29164
29165 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29166    GCC models tham as 64 32-bit registers, so we need to describe this to
29167    the DWARF generation code.  Other registers can use the default.  */
29168 static rtx
29169 arm_dwarf_register_span (rtx rtl)
29170 {
29171   machine_mode mode;
29172   unsigned regno;
29173   rtx parts[16];
29174   int nregs;
29175   int i;
29176
29177   regno = REGNO (rtl);
29178   if (!IS_VFP_REGNUM (regno))
29179     return NULL_RTX;
29180
29181   /* XXX FIXME: The EABI defines two VFP register ranges:
29182         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29183         256-287: D0-D31
29184      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29185      corresponding D register.  Until GDB supports this, we shall use the
29186      legacy encodings.  We also use these encodings for D0-D15 for
29187      compatibility with older debuggers.  */
29188   mode = GET_MODE (rtl);
29189   if (GET_MODE_SIZE (mode) < 8)
29190     return NULL_RTX;
29191
29192   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29193     {
29194       nregs = GET_MODE_SIZE (mode) / 4;
29195       for (i = 0; i < nregs; i += 2)
29196         if (TARGET_BIG_END)
29197           {
29198             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29199             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29200           }
29201         else
29202           {
29203             parts[i] = gen_rtx_REG (SImode, regno + i);
29204             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29205           }
29206     }
29207   else
29208     {
29209       nregs = GET_MODE_SIZE (mode) / 8;
29210       for (i = 0; i < nregs; i++)
29211         parts[i] = gen_rtx_REG (DImode, regno + i);
29212     }
29213
29214   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29215 }
29216
29217 #if ARM_UNWIND_INFO
29218 /* Emit unwind directives for a store-multiple instruction or stack pointer
29219    push during alignment.
29220    These should only ever be generated by the function prologue code, so
29221    expect them to have a particular form.
29222    The store-multiple instruction sometimes pushes pc as the last register,
29223    although it should not be tracked into unwind information, or for -Os
29224    sometimes pushes some dummy registers before first register that needs
29225    to be tracked in unwind information; such dummy registers are there just
29226    to avoid separate stack adjustment, and will not be restored in the
29227    epilogue.  */
29228
29229 static void
29230 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29231 {
29232   int i;
29233   HOST_WIDE_INT offset;
29234   HOST_WIDE_INT nregs;
29235   int reg_size;
29236   unsigned reg;
29237   unsigned lastreg;
29238   unsigned padfirst = 0, padlast = 0;
29239   rtx e;
29240
29241   e = XVECEXP (p, 0, 0);
29242   gcc_assert (GET_CODE (e) == SET);
29243
29244   /* First insn will adjust the stack pointer.  */
29245   gcc_assert (GET_CODE (e) == SET
29246               && REG_P (SET_DEST (e))
29247               && REGNO (SET_DEST (e)) == SP_REGNUM
29248               && GET_CODE (SET_SRC (e)) == PLUS);
29249
29250   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29251   nregs = XVECLEN (p, 0) - 1;
29252   gcc_assert (nregs);
29253
29254   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29255   if (reg < 16)
29256     {
29257       /* For -Os dummy registers can be pushed at the beginning to
29258          avoid separate stack pointer adjustment.  */
29259       e = XVECEXP (p, 0, 1);
29260       e = XEXP (SET_DEST (e), 0);
29261       if (GET_CODE (e) == PLUS)
29262         padfirst = INTVAL (XEXP (e, 1));
29263       gcc_assert (padfirst == 0 || optimize_size);
29264       /* The function prologue may also push pc, but not annotate it as it is
29265          never restored.  We turn this into a stack pointer adjustment.  */
29266       e = XVECEXP (p, 0, nregs);
29267       e = XEXP (SET_DEST (e), 0);
29268       if (GET_CODE (e) == PLUS)
29269         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29270       else
29271         padlast = offset - 4;
29272       gcc_assert (padlast == 0 || padlast == 4);
29273       if (padlast == 4)
29274         fprintf (asm_out_file, "\t.pad #4\n");
29275       reg_size = 4;
29276       fprintf (asm_out_file, "\t.save {");
29277     }
29278   else if (IS_VFP_REGNUM (reg))
29279     {
29280       reg_size = 8;
29281       fprintf (asm_out_file, "\t.vsave {");
29282     }
29283   else
29284     /* Unknown register type.  */
29285     gcc_unreachable ();
29286
29287   /* If the stack increment doesn't match the size of the saved registers,
29288      something has gone horribly wrong.  */
29289   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29290
29291   offset = padfirst;
29292   lastreg = 0;
29293   /* The remaining insns will describe the stores.  */
29294   for (i = 1; i <= nregs; i++)
29295     {
29296       /* Expect (set (mem <addr>) (reg)).
29297          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29298       e = XVECEXP (p, 0, i);
29299       gcc_assert (GET_CODE (e) == SET
29300                   && MEM_P (SET_DEST (e))
29301                   && REG_P (SET_SRC (e)));
29302
29303       reg = REGNO (SET_SRC (e));
29304       gcc_assert (reg >= lastreg);
29305
29306       if (i != 1)
29307         fprintf (asm_out_file, ", ");
29308       /* We can't use %r for vfp because we need to use the
29309          double precision register names.  */
29310       if (IS_VFP_REGNUM (reg))
29311         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29312       else
29313         asm_fprintf (asm_out_file, "%r", reg);
29314
29315 #ifdef ENABLE_CHECKING
29316       /* Check that the addresses are consecutive.  */
29317       e = XEXP (SET_DEST (e), 0);
29318       if (GET_CODE (e) == PLUS)
29319         gcc_assert (REG_P (XEXP (e, 0))
29320                     && REGNO (XEXP (e, 0)) == SP_REGNUM
29321                     && CONST_INT_P (XEXP (e, 1))
29322                     && offset == INTVAL (XEXP (e, 1)));
29323       else
29324         gcc_assert (i == 1
29325                     && REG_P (e)
29326                     && REGNO (e) == SP_REGNUM);
29327       offset += reg_size;
29328 #endif
29329     }
29330   fprintf (asm_out_file, "}\n");
29331   if (padfirst)
29332     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29333 }
29334
29335 /*  Emit unwind directives for a SET.  */
29336
29337 static void
29338 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29339 {
29340   rtx e0;
29341   rtx e1;
29342   unsigned reg;
29343
29344   e0 = XEXP (p, 0);
29345   e1 = XEXP (p, 1);
29346   switch (GET_CODE (e0))
29347     {
29348     case MEM:
29349       /* Pushing a single register.  */
29350       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29351           || !REG_P (XEXP (XEXP (e0, 0), 0))
29352           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29353         abort ();
29354
29355       asm_fprintf (asm_out_file, "\t.save ");
29356       if (IS_VFP_REGNUM (REGNO (e1)))
29357         asm_fprintf(asm_out_file, "{d%d}\n",
29358                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29359       else
29360         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29361       break;
29362
29363     case REG:
29364       if (REGNO (e0) == SP_REGNUM)
29365         {
29366           /* A stack increment.  */
29367           if (GET_CODE (e1) != PLUS
29368               || !REG_P (XEXP (e1, 0))
29369               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29370               || !CONST_INT_P (XEXP (e1, 1)))
29371             abort ();
29372
29373           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29374                        -INTVAL (XEXP (e1, 1)));
29375         }
29376       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29377         {
29378           HOST_WIDE_INT offset;
29379
29380           if (GET_CODE (e1) == PLUS)
29381             {
29382               if (!REG_P (XEXP (e1, 0))
29383                   || !CONST_INT_P (XEXP (e1, 1)))
29384                 abort ();
29385               reg = REGNO (XEXP (e1, 0));
29386               offset = INTVAL (XEXP (e1, 1));
29387               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29388                            HARD_FRAME_POINTER_REGNUM, reg,
29389                            offset);
29390             }
29391           else if (REG_P (e1))
29392             {
29393               reg = REGNO (e1);
29394               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29395                            HARD_FRAME_POINTER_REGNUM, reg);
29396             }
29397           else
29398             abort ();
29399         }
29400       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29401         {
29402           /* Move from sp to reg.  */
29403           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29404         }
29405      else if (GET_CODE (e1) == PLUS
29406               && REG_P (XEXP (e1, 0))
29407               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29408               && CONST_INT_P (XEXP (e1, 1)))
29409         {
29410           /* Set reg to offset from sp.  */
29411           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29412                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29413         }
29414       else
29415         abort ();
29416       break;
29417
29418     default:
29419       abort ();
29420     }
29421 }
29422
29423
29424 /* Emit unwind directives for the given insn.  */
29425
29426 static void
29427 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29428 {
29429   rtx note, pat;
29430   bool handled_one = false;
29431
29432   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29433     return;
29434
29435   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29436       && (TREE_NOTHROW (current_function_decl)
29437           || crtl->all_throwers_are_sibcalls))
29438     return;
29439
29440   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29441     return;
29442
29443   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29444     {
29445       switch (REG_NOTE_KIND (note))
29446         {
29447         case REG_FRAME_RELATED_EXPR:
29448           pat = XEXP (note, 0);
29449           goto found;
29450
29451         case REG_CFA_REGISTER:
29452           pat = XEXP (note, 0);
29453           if (pat == NULL)
29454             {
29455               pat = PATTERN (insn);
29456               if (GET_CODE (pat) == PARALLEL)
29457                 pat = XVECEXP (pat, 0, 0);
29458             }
29459
29460           /* Only emitted for IS_STACKALIGN re-alignment.  */
29461           {
29462             rtx dest, src;
29463             unsigned reg;
29464
29465             src = SET_SRC (pat);
29466             dest = SET_DEST (pat);
29467
29468             gcc_assert (src == stack_pointer_rtx);
29469             reg = REGNO (dest);
29470             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29471                          reg + 0x90, reg);
29472           }
29473           handled_one = true;
29474           break;
29475
29476         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29477            to get correct dwarf information for shrink-wrap.  We should not
29478            emit unwind information for it because these are used either for
29479            pretend arguments or notes to adjust sp and restore registers from
29480            stack.  */
29481         case REG_CFA_DEF_CFA:
29482         case REG_CFA_ADJUST_CFA:
29483         case REG_CFA_RESTORE:
29484           return;
29485
29486         case REG_CFA_EXPRESSION:
29487         case REG_CFA_OFFSET:
29488           /* ??? Only handling here what we actually emit.  */
29489           gcc_unreachable ();
29490
29491         default:
29492           break;
29493         }
29494     }
29495   if (handled_one)
29496     return;
29497   pat = PATTERN (insn);
29498  found:
29499
29500   switch (GET_CODE (pat))
29501     {
29502     case SET:
29503       arm_unwind_emit_set (asm_out_file, pat);
29504       break;
29505
29506     case SEQUENCE:
29507       /* Store multiple.  */
29508       arm_unwind_emit_sequence (asm_out_file, pat);
29509       break;
29510
29511     default:
29512       abort();
29513     }
29514 }
29515
29516
29517 /* Output a reference from a function exception table to the type_info
29518    object X.  The EABI specifies that the symbol should be relocated by
29519    an R_ARM_TARGET2 relocation.  */
29520
29521 static bool
29522 arm_output_ttype (rtx x)
29523 {
29524   fputs ("\t.word\t", asm_out_file);
29525   output_addr_const (asm_out_file, x);
29526   /* Use special relocations for symbol references.  */
29527   if (!CONST_INT_P (x))
29528     fputs ("(TARGET2)", asm_out_file);
29529   fputc ('\n', asm_out_file);
29530
29531   return TRUE;
29532 }
29533
29534 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29535
29536 static void
29537 arm_asm_emit_except_personality (rtx personality)
29538 {
29539   fputs ("\t.personality\t", asm_out_file);
29540   output_addr_const (asm_out_file, personality);
29541   fputc ('\n', asm_out_file);
29542 }
29543
29544 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29545
29546 static void
29547 arm_asm_init_sections (void)
29548 {
29549   exception_section = get_unnamed_section (0, output_section_asm_op,
29550                                            "\t.handlerdata");
29551 }
29552 #endif /* ARM_UNWIND_INFO */
29553
29554 /* Output unwind directives for the start/end of a function.  */
29555
29556 void
29557 arm_output_fn_unwind (FILE * f, bool prologue)
29558 {
29559   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29560     return;
29561
29562   if (prologue)
29563     fputs ("\t.fnstart\n", f);
29564   else
29565     {
29566       /* If this function will never be unwound, then mark it as such.
29567          The came condition is used in arm_unwind_emit to suppress
29568          the frame annotations.  */
29569       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29570           && (TREE_NOTHROW (current_function_decl)
29571               || crtl->all_throwers_are_sibcalls))
29572         fputs("\t.cantunwind\n", f);
29573
29574       fputs ("\t.fnend\n", f);
29575     }
29576 }
29577
29578 static bool
29579 arm_emit_tls_decoration (FILE *fp, rtx x)
29580 {
29581   enum tls_reloc reloc;
29582   rtx val;
29583
29584   val = XVECEXP (x, 0, 0);
29585   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29586
29587   output_addr_const (fp, val);
29588
29589   switch (reloc)
29590     {
29591     case TLS_GD32:
29592       fputs ("(tlsgd)", fp);
29593       break;
29594     case TLS_LDM32:
29595       fputs ("(tlsldm)", fp);
29596       break;
29597     case TLS_LDO32:
29598       fputs ("(tlsldo)", fp);
29599       break;
29600     case TLS_IE32:
29601       fputs ("(gottpoff)", fp);
29602       break;
29603     case TLS_LE32:
29604       fputs ("(tpoff)", fp);
29605       break;
29606     case TLS_DESCSEQ:
29607       fputs ("(tlsdesc)", fp);
29608       break;
29609     default:
29610       gcc_unreachable ();
29611     }
29612
29613   switch (reloc)
29614     {
29615     case TLS_GD32:
29616     case TLS_LDM32:
29617     case TLS_IE32:
29618     case TLS_DESCSEQ:
29619       fputs (" + (. - ", fp);
29620       output_addr_const (fp, XVECEXP (x, 0, 2));
29621       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29622       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29623       output_addr_const (fp, XVECEXP (x, 0, 3));
29624       fputc (')', fp);
29625       break;
29626     default:
29627       break;
29628     }
29629
29630   return TRUE;
29631 }
29632
29633 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29634
29635 static void
29636 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29637 {
29638   gcc_assert (size == 4);
29639   fputs ("\t.word\t", file);
29640   output_addr_const (file, x);
29641   fputs ("(tlsldo)", file);
29642 }
29643
29644 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29645
29646 static bool
29647 arm_output_addr_const_extra (FILE *fp, rtx x)
29648 {
29649   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29650     return arm_emit_tls_decoration (fp, x);
29651   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29652     {
29653       char label[256];
29654       int labelno = INTVAL (XVECEXP (x, 0, 0));
29655
29656       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29657       assemble_name_raw (fp, label);
29658
29659       return TRUE;
29660     }
29661   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29662     {
29663       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29664       if (GOT_PCREL)
29665         fputs ("+.", fp);
29666       fputs ("-(", fp);
29667       output_addr_const (fp, XVECEXP (x, 0, 0));
29668       fputc (')', fp);
29669       return TRUE;
29670     }
29671   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29672     {
29673       output_addr_const (fp, XVECEXP (x, 0, 0));
29674       if (GOT_PCREL)
29675         fputs ("+.", fp);
29676       fputs ("-(", fp);
29677       output_addr_const (fp, XVECEXP (x, 0, 1));
29678       fputc (')', fp);
29679       return TRUE;
29680     }
29681   else if (GET_CODE (x) == CONST_VECTOR)
29682     return arm_emit_vector_const (fp, x);
29683
29684   return FALSE;
29685 }
29686
29687 /* Output assembly for a shift instruction.
29688    SET_FLAGS determines how the instruction modifies the condition codes.
29689    0 - Do not set condition codes.
29690    1 - Set condition codes.
29691    2 - Use smallest instruction.  */
29692 const char *
29693 arm_output_shift(rtx * operands, int set_flags)
29694 {
29695   char pattern[100];
29696   static const char flag_chars[3] = {'?', '.', '!'};
29697   const char *shift;
29698   HOST_WIDE_INT val;
29699   char c;
29700
29701   c = flag_chars[set_flags];
29702   if (TARGET_UNIFIED_ASM)
29703     {
29704       shift = shift_op(operands[3], &val);
29705       if (shift)
29706         {
29707           if (val != -1)
29708             operands[2] = GEN_INT(val);
29709           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29710         }
29711       else
29712         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29713     }
29714   else
29715     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29716   output_asm_insn (pattern, operands);
29717   return "";
29718 }
29719
29720 /* Output assembly for a WMMX immediate shift instruction.  */
29721 const char *
29722 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29723 {
29724   int shift = INTVAL (operands[2]);
29725   char templ[50];
29726   machine_mode opmode = GET_MODE (operands[0]);
29727
29728   gcc_assert (shift >= 0);
29729
29730   /* If the shift value in the register versions is > 63 (for D qualifier),
29731      31 (for W qualifier) or 15 (for H qualifier).  */
29732   if (((opmode == V4HImode) && (shift > 15))
29733         || ((opmode == V2SImode) && (shift > 31))
29734         || ((opmode == DImode) && (shift > 63)))
29735   {
29736     if (wror_or_wsra)
29737       {
29738         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29739         output_asm_insn (templ, operands);
29740         if (opmode == DImode)
29741           {
29742             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29743             output_asm_insn (templ, operands);
29744           }
29745       }
29746     else
29747       {
29748         /* The destination register will contain all zeros.  */
29749         sprintf (templ, "wzero\t%%0");
29750         output_asm_insn (templ, operands);
29751       }
29752     return "";
29753   }
29754
29755   if ((opmode == DImode) && (shift > 32))
29756     {
29757       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29758       output_asm_insn (templ, operands);
29759       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29760       output_asm_insn (templ, operands);
29761     }
29762   else
29763     {
29764       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29765       output_asm_insn (templ, operands);
29766     }
29767   return "";
29768 }
29769
29770 /* Output assembly for a WMMX tinsr instruction.  */
29771 const char *
29772 arm_output_iwmmxt_tinsr (rtx *operands)
29773 {
29774   int mask = INTVAL (operands[3]);
29775   int i;
29776   char templ[50];
29777   int units = mode_nunits[GET_MODE (operands[0])];
29778   gcc_assert ((mask & (mask - 1)) == 0);
29779   for (i = 0; i < units; ++i)
29780     {
29781       if ((mask & 0x01) == 1)
29782         {
29783           break;
29784         }
29785       mask >>= 1;
29786     }
29787   gcc_assert (i < units);
29788   {
29789     switch (GET_MODE (operands[0]))
29790       {
29791       case V8QImode:
29792         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29793         break;
29794       case V4HImode:
29795         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29796         break;
29797       case V2SImode:
29798         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29799         break;
29800       default:
29801         gcc_unreachable ();
29802         break;
29803       }
29804     output_asm_insn (templ, operands);
29805   }
29806   return "";
29807 }
29808
29809 /* Output a Thumb-1 casesi dispatch sequence.  */
29810 const char *
29811 thumb1_output_casesi (rtx *operands)
29812 {
29813   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29814
29815   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29816
29817   switch (GET_MODE(diff_vec))
29818     {
29819     case QImode:
29820       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29821               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29822     case HImode:
29823       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29824               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29825     case SImode:
29826       return "bl\t%___gnu_thumb1_case_si";
29827     default:
29828       gcc_unreachable ();
29829     }
29830 }
29831
29832 /* Output a Thumb-2 casesi instruction.  */
29833 const char *
29834 thumb2_output_casesi (rtx *operands)
29835 {
29836   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29837
29838   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29839
29840   output_asm_insn ("cmp\t%0, %1", operands);
29841   output_asm_insn ("bhi\t%l3", operands);
29842   switch (GET_MODE(diff_vec))
29843     {
29844     case QImode:
29845       return "tbb\t[%|pc, %0]";
29846     case HImode:
29847       return "tbh\t[%|pc, %0, lsl #1]";
29848     case SImode:
29849       if (flag_pic)
29850         {
29851           output_asm_insn ("adr\t%4, %l2", operands);
29852           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29853           output_asm_insn ("add\t%4, %4, %5", operands);
29854           return "bx\t%4";
29855         }
29856       else
29857         {
29858           output_asm_insn ("adr\t%4, %l2", operands);
29859           return "ldr\t%|pc, [%4, %0, lsl #2]";
29860         }
29861     default:
29862       gcc_unreachable ();
29863     }
29864 }
29865
29866 /* Most ARM cores are single issue, but some newer ones can dual issue.
29867    The scheduler descriptions rely on this being correct.  */
29868 static int
29869 arm_issue_rate (void)
29870 {
29871   switch (arm_tune)
29872     {
29873     case cortexa15:
29874     case cortexa57:
29875       return 3;
29876
29877     case cortexm7:
29878     case cortexr4:
29879     case cortexr4f:
29880     case cortexr5:
29881     case genericv7a:
29882     case cortexa5:
29883     case cortexa7:
29884     case cortexa8:
29885     case cortexa9:
29886     case cortexa12:
29887     case cortexa53:
29888     case fa726te:
29889     case marvell_pj4:
29890       return 2;
29891
29892     default:
29893       return 1;
29894     }
29895 }
29896
29897 /* A table and a function to perform ARM-specific name mangling for
29898    NEON vector types in order to conform to the AAPCS (see "Procedure
29899    Call Standard for the ARM Architecture", Appendix A).  To qualify
29900    for emission with the mangled names defined in that document, a
29901    vector type must not only be of the correct mode but also be
29902    composed of NEON vector element types (e.g. __builtin_neon_qi).  */
29903 typedef struct
29904 {
29905   machine_mode mode;
29906   const char *element_type_name;
29907   const char *aapcs_name;
29908 } arm_mangle_map_entry;
29909
29910 static arm_mangle_map_entry arm_mangle_map[] = {
29911   /* 64-bit containerized types.  */
29912   { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
29913   { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
29914   { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
29915   { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
29916   { V4HFmode,  "__builtin_neon_hf",     "18__simd64_float16_t" },
29917   { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
29918   { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
29919   { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
29920   { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
29921   { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
29922
29923   /* 128-bit containerized types.  */
29924   { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
29925   { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
29926   { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
29927   { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
29928   { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
29929   { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
29930   { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
29931   { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
29932   { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
29933   { VOIDmode, NULL, NULL }
29934 };
29935
29936 const char *
29937 arm_mangle_type (const_tree type)
29938 {
29939   arm_mangle_map_entry *pos = arm_mangle_map;
29940
29941   /* The ARM ABI documents (10th October 2008) say that "__va_list"
29942      has to be managled as if it is in the "std" namespace.  */
29943   if (TARGET_AAPCS_BASED
29944       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29945     return "St9__va_list";
29946
29947   /* Half-precision float.  */
29948   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29949     return "Dh";
29950
29951   if (TREE_CODE (type) != VECTOR_TYPE)
29952     return NULL;
29953
29954   /* Check the mode of the vector type, and the name of the vector
29955      element type, against the table.  */
29956   while (pos->mode != VOIDmode)
29957     {
29958       tree elt_type = TREE_TYPE (type);
29959
29960       if (pos->mode == TYPE_MODE (type)
29961           && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29962           && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29963                       pos->element_type_name))
29964         return pos->aapcs_name;
29965
29966       pos++;
29967     }
29968
29969   /* Use the default mangling for unrecognized (possibly user-defined)
29970      vector types.  */
29971   return NULL;
29972 }
29973
29974 /* Order of allocation of core registers for Thumb: this allocation is
29975    written over the corresponding initial entries of the array
29976    initialized with REG_ALLOC_ORDER.  We allocate all low registers
29977    first.  Saving and restoring a low register is usually cheaper than
29978    using a call-clobbered high register.  */
29979
29980 static const int thumb_core_reg_alloc_order[] =
29981 {
29982    3,  2,  1,  0,  4,  5,  6,  7,
29983   14, 12,  8,  9, 10, 11
29984 };
29985
29986 /* Adjust register allocation order when compiling for Thumb.  */
29987
29988 void
29989 arm_order_regs_for_local_alloc (void)
29990 {
29991   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29992   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29993   if (TARGET_THUMB)
29994     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29995             sizeof (thumb_core_reg_alloc_order));
29996 }
29997
29998 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
29999
30000 bool
30001 arm_frame_pointer_required (void)
30002 {
30003   return (cfun->has_nonlocal_label
30004           || SUBTARGET_FRAME_POINTER_REQUIRED
30005           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30006 }
30007
30008 /* Only thumb1 can't support conditional execution, so return true if
30009    the target is not thumb1.  */
30010 static bool
30011 arm_have_conditional_execution (void)
30012 {
30013   return !TARGET_THUMB1;
30014 }
30015
30016 tree
30017 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30018 {
30019   machine_mode in_mode, out_mode;
30020   int in_n, out_n;
30021   bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30022
30023   if (TREE_CODE (type_out) != VECTOR_TYPE
30024       || TREE_CODE (type_in) != VECTOR_TYPE)
30025     return NULL_TREE;
30026
30027   out_mode = TYPE_MODE (TREE_TYPE (type_out));
30028   out_n = TYPE_VECTOR_SUBPARTS (type_out);
30029   in_mode = TYPE_MODE (TREE_TYPE (type_in));
30030   in_n = TYPE_VECTOR_SUBPARTS (type_in);
30031
30032 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30033    decl of the vectorized builtin for the appropriate vector mode.
30034    NULL_TREE is returned if no such builtin is available.  */
30035 #undef ARM_CHECK_BUILTIN_MODE
30036 #define ARM_CHECK_BUILTIN_MODE(C)    \
30037   (TARGET_NEON && TARGET_FPU_ARMV8   \
30038    && flag_unsafe_math_optimizations \
30039    && ARM_CHECK_BUILTIN_MODE_1 (C))
30040
30041 #undef ARM_CHECK_BUILTIN_MODE_1
30042 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30043   (out_mode == SFmode && out_n == C \
30044    && in_mode == SFmode && in_n == C)
30045
30046 #undef ARM_FIND_VRINT_VARIANT
30047 #define ARM_FIND_VRINT_VARIANT(N) \
30048   (ARM_CHECK_BUILTIN_MODE (2) \
30049     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30050     : (ARM_CHECK_BUILTIN_MODE (4) \
30051       ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30052       : NULL_TREE))
30053
30054   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30055     {
30056       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30057       switch (fn)
30058         {
30059           case BUILT_IN_FLOORF:
30060             return ARM_FIND_VRINT_VARIANT (vrintm);
30061           case BUILT_IN_CEILF:
30062             return ARM_FIND_VRINT_VARIANT (vrintp);
30063           case BUILT_IN_TRUNCF:
30064             return ARM_FIND_VRINT_VARIANT (vrintz);
30065           case BUILT_IN_ROUNDF:
30066             return ARM_FIND_VRINT_VARIANT (vrinta);
30067 #undef ARM_CHECK_BUILTIN_MODE_1
30068 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30069   (out_mode == SImode && out_n == C \
30070    && in_mode == SFmode && in_n == C)
30071
30072 #define ARM_FIND_VCVT_VARIANT(N) \
30073   (ARM_CHECK_BUILTIN_MODE (2) \
30074    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30075    : (ARM_CHECK_BUILTIN_MODE (4) \
30076      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30077      : NULL_TREE))
30078
30079 #define ARM_FIND_VCVTU_VARIANT(N) \
30080   (ARM_CHECK_BUILTIN_MODE (2) \
30081    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30082    : (ARM_CHECK_BUILTIN_MODE (4) \
30083      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30084      : NULL_TREE))
30085           case BUILT_IN_LROUNDF:
30086             return out_unsigned_p
30087                      ? ARM_FIND_VCVTU_VARIANT (vcvta)
30088                      : ARM_FIND_VCVT_VARIANT (vcvta);
30089           case BUILT_IN_LCEILF:
30090             return out_unsigned_p
30091                      ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30092                      : ARM_FIND_VCVT_VARIANT (vcvtp);
30093           case BUILT_IN_LFLOORF:
30094             return out_unsigned_p
30095                      ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30096                      : ARM_FIND_VCVT_VARIANT (vcvtm);
30097 #undef ARM_CHECK_BUILTIN_MODE
30098 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30099   (out_mode == N##mode && out_n == C \
30100    && in_mode == N##mode && in_n == C)
30101           case BUILT_IN_BSWAP16:
30102             if (ARM_CHECK_BUILTIN_MODE (4, HI))
30103               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30104             else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30105               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30106             else
30107               return NULL_TREE;
30108           case BUILT_IN_BSWAP32:
30109             if (ARM_CHECK_BUILTIN_MODE (2, SI))
30110               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30111             else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30112               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30113             else
30114               return NULL_TREE;
30115           case BUILT_IN_BSWAP64:
30116             if (ARM_CHECK_BUILTIN_MODE (2, DI))
30117               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30118             else
30119               return NULL_TREE;
30120           case BUILT_IN_COPYSIGNF:
30121             if (ARM_CHECK_BUILTIN_MODE (2, SF))
30122               return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30123             else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30124               return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30125             else
30126               return NULL_TREE;
30127
30128           default:
30129             return NULL_TREE;
30130         }
30131     }
30132   return NULL_TREE;
30133 }
30134 #undef ARM_FIND_VCVT_VARIANT
30135 #undef ARM_FIND_VCVTU_VARIANT
30136 #undef ARM_CHECK_BUILTIN_MODE
30137 #undef ARM_FIND_VRINT_VARIANT
30138
30139
30140 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30141 static HOST_WIDE_INT
30142 arm_vector_alignment (const_tree type)
30143 {
30144   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30145
30146   if (TARGET_AAPCS_BASED)
30147     align = MIN (align, 64);
30148
30149   return align;
30150 }
30151
30152 static unsigned int
30153 arm_autovectorize_vector_sizes (void)
30154 {
30155   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30156 }
30157
30158 static bool
30159 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30160 {
30161   /* Vectors which aren't in packed structures will not be less aligned than
30162      the natural alignment of their element type, so this is safe.  */
30163   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30164     return !is_packed;
30165
30166   return default_builtin_vector_alignment_reachable (type, is_packed);
30167 }
30168
30169 static bool
30170 arm_builtin_support_vector_misalignment (machine_mode mode,
30171                                          const_tree type, int misalignment,
30172                                          bool is_packed)
30173 {
30174   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30175     {
30176       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30177
30178       if (is_packed)
30179         return align == 1;
30180
30181       /* If the misalignment is unknown, we should be able to handle the access
30182          so long as it is not to a member of a packed data structure.  */
30183       if (misalignment == -1)
30184         return true;
30185
30186       /* Return true if the misalignment is a multiple of the natural alignment
30187          of the vector's element type.  This is probably always going to be
30188          true in practice, since we've already established that this isn't a
30189          packed access.  */
30190       return ((misalignment % align) == 0);
30191     }
30192
30193   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30194                                                       is_packed);
30195 }
30196
30197 static void
30198 arm_conditional_register_usage (void)
30199 {
30200   int regno;
30201
30202   if (TARGET_THUMB1 && optimize_size)
30203     {
30204       /* When optimizing for size on Thumb-1, it's better not
30205         to use the HI regs, because of the overhead of
30206         stacking them.  */
30207       for (regno = FIRST_HI_REGNUM;
30208            regno <= LAST_HI_REGNUM; ++regno)
30209         fixed_regs[regno] = call_used_regs[regno] = 1;
30210     }
30211
30212   /* The link register can be clobbered by any branch insn,
30213      but we have no way to track that at present, so mark
30214      it as unavailable.  */
30215   if (TARGET_THUMB1)
30216     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30217
30218   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30219     {
30220       /* VFPv3 registers are disabled when earlier VFP
30221          versions are selected due to the definition of
30222          LAST_VFP_REGNUM.  */
30223       for (regno = FIRST_VFP_REGNUM;
30224            regno <= LAST_VFP_REGNUM; ++ regno)
30225         {
30226           fixed_regs[regno] = 0;
30227           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30228             || regno >= FIRST_VFP_REGNUM + 32;
30229         }
30230     }
30231
30232   if (TARGET_REALLY_IWMMXT)
30233     {
30234       regno = FIRST_IWMMXT_GR_REGNUM;
30235       /* The 2002/10/09 revision of the XScale ABI has wCG0
30236          and wCG1 as call-preserved registers.  The 2002/11/21
30237          revision changed this so that all wCG registers are
30238          scratch registers.  */
30239       for (regno = FIRST_IWMMXT_GR_REGNUM;
30240            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30241         fixed_regs[regno] = 0;
30242       /* The XScale ABI has wR0 - wR9 as scratch registers,
30243          the rest as call-preserved registers.  */
30244       for (regno = FIRST_IWMMXT_REGNUM;
30245            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30246         {
30247           fixed_regs[regno] = 0;
30248           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30249         }
30250     }
30251
30252   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30253     {
30254       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30255       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30256     }
30257   else if (TARGET_APCS_STACK)
30258     {
30259       fixed_regs[10]     = 1;
30260       call_used_regs[10] = 1;
30261     }
30262   /* -mcaller-super-interworking reserves r11 for calls to
30263      _interwork_r11_call_via_rN().  Making the register global
30264      is an easy way of ensuring that it remains valid for all
30265      calls.  */
30266   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30267       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30268     {
30269       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30270       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30271       if (TARGET_CALLER_INTERWORKING)
30272         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30273     }
30274   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30275 }
30276
30277 static reg_class_t
30278 arm_preferred_rename_class (reg_class_t rclass)
30279 {
30280   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30281      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30282      and code size can be reduced.  */
30283   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30284     return LO_REGS;
30285   else
30286     return NO_REGS;
30287 }
30288
30289 /* Compute the atrribute "length" of insn "*push_multi".
30290    So this function MUST be kept in sync with that insn pattern.  */
30291 int
30292 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30293 {
30294   int i, regno, hi_reg;
30295   int num_saves = XVECLEN (parallel_op, 0);
30296
30297   /* ARM mode.  */
30298   if (TARGET_ARM)
30299     return 4;
30300   /* Thumb1 mode.  */
30301   if (TARGET_THUMB1)
30302     return 2;
30303
30304   /* Thumb2 mode.  */
30305   regno = REGNO (first_op);
30306   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30307   for (i = 1; i < num_saves && !hi_reg; i++)
30308     {
30309       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30310       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30311     }
30312
30313   if (!hi_reg)
30314     return 2;
30315   return 4;
30316 }
30317
30318 /* Compute the number of instructions emitted by output_move_double.  */
30319 int
30320 arm_count_output_move_double_insns (rtx *operands)
30321 {
30322   int count;
30323   rtx ops[2];
30324   /* output_move_double may modify the operands array, so call it
30325      here on a copy of the array.  */
30326   ops[0] = operands[0];
30327   ops[1] = operands[1];
30328   output_move_double (ops, false, &count);
30329   return count;
30330 }
30331
30332 int
30333 vfp3_const_double_for_fract_bits (rtx operand)
30334 {
30335   REAL_VALUE_TYPE r0;
30336
30337   if (!CONST_DOUBLE_P (operand))
30338     return 0;
30339
30340   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30341   if (exact_real_inverse (DFmode, &r0))
30342     {
30343       if (exact_real_truncate (DFmode, &r0))
30344         {
30345           HOST_WIDE_INT value = real_to_integer (&r0);
30346           value = value & 0xffffffff;
30347           if ((value != 0) && ( (value & (value - 1)) == 0))
30348             return int_log2 (value);
30349         }
30350     }
30351   return 0;
30352 }
30353
30354 int
30355 vfp3_const_double_for_bits (rtx operand)
30356 {
30357   REAL_VALUE_TYPE r0;
30358
30359   if (!CONST_DOUBLE_P (operand))
30360     return 0;
30361
30362   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30363   if (exact_real_truncate (DFmode, &r0))
30364     {
30365       HOST_WIDE_INT value = real_to_integer (&r0);
30366       value = value & 0xffffffff;
30367       if ((value != 0) && ( (value & (value - 1)) == 0))
30368         return int_log2 (value);
30369     }
30370
30371   return 0;
30372 }
30373 \f
30374 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30375
30376 static void
30377 arm_pre_atomic_barrier (enum memmodel model)
30378 {
30379   if (need_atomic_barrier_p (model, true))
30380     emit_insn (gen_memory_barrier ());
30381 }
30382
30383 static void
30384 arm_post_atomic_barrier (enum memmodel model)
30385 {
30386   if (need_atomic_barrier_p (model, false))
30387     emit_insn (gen_memory_barrier ());
30388 }
30389
30390 /* Emit the load-exclusive and store-exclusive instructions.
30391    Use acquire and release versions if necessary.  */
30392
30393 static void
30394 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30395 {
30396   rtx (*gen) (rtx, rtx);
30397
30398   if (acq)
30399     {
30400       switch (mode)
30401         {
30402         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30403         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30404         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30405         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30406         default:
30407           gcc_unreachable ();
30408         }
30409     }
30410   else
30411     {
30412       switch (mode)
30413         {
30414         case QImode: gen = gen_arm_load_exclusiveqi; break;
30415         case HImode: gen = gen_arm_load_exclusivehi; break;
30416         case SImode: gen = gen_arm_load_exclusivesi; break;
30417         case DImode: gen = gen_arm_load_exclusivedi; break;
30418         default:
30419           gcc_unreachable ();
30420         }
30421     }
30422
30423   emit_insn (gen (rval, mem));
30424 }
30425
30426 static void
30427 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30428                           rtx mem, bool rel)
30429 {
30430   rtx (*gen) (rtx, rtx, rtx);
30431
30432   if (rel)
30433     {
30434       switch (mode)
30435         {
30436         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30437         case HImode: gen = gen_arm_store_release_exclusivehi; break;
30438         case SImode: gen = gen_arm_store_release_exclusivesi; break;
30439         case DImode: gen = gen_arm_store_release_exclusivedi; break;
30440         default:
30441           gcc_unreachable ();
30442         }
30443     }
30444   else
30445     {
30446       switch (mode)
30447         {
30448         case QImode: gen = gen_arm_store_exclusiveqi; break;
30449         case HImode: gen = gen_arm_store_exclusivehi; break;
30450         case SImode: gen = gen_arm_store_exclusivesi; break;
30451         case DImode: gen = gen_arm_store_exclusivedi; break;
30452         default:
30453           gcc_unreachable ();
30454         }
30455     }
30456
30457   emit_insn (gen (bval, rval, mem));
30458 }
30459
30460 /* Mark the previous jump instruction as unlikely.  */
30461
30462 static void
30463 emit_unlikely_jump (rtx insn)
30464 {
30465   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30466
30467   insn = emit_jump_insn (insn);
30468   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30469 }
30470
30471 /* Expand a compare and swap pattern.  */
30472
30473 void
30474 arm_expand_compare_and_swap (rtx operands[])
30475 {
30476   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30477   machine_mode mode;
30478   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30479
30480   bval = operands[0];
30481   rval = operands[1];
30482   mem = operands[2];
30483   oldval = operands[3];
30484   newval = operands[4];
30485   is_weak = operands[5];
30486   mod_s = operands[6];
30487   mod_f = operands[7];
30488   mode = GET_MODE (mem);
30489
30490   /* Normally the succ memory model must be stronger than fail, but in the
30491      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30492      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30493
30494   if (TARGET_HAVE_LDACQ
30495       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30496       && INTVAL (mod_s) == MEMMODEL_RELEASE)
30497     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30498
30499   switch (mode)
30500     {
30501     case QImode:
30502     case HImode:
30503       /* For narrow modes, we're going to perform the comparison in SImode,
30504          so do the zero-extension now.  */
30505       rval = gen_reg_rtx (SImode);
30506       oldval = convert_modes (SImode, mode, oldval, true);
30507       /* FALLTHRU */
30508
30509     case SImode:
30510       /* Force the value into a register if needed.  We waited until after
30511          the zero-extension above to do this properly.  */
30512       if (!arm_add_operand (oldval, SImode))
30513         oldval = force_reg (SImode, oldval);
30514       break;
30515
30516     case DImode:
30517       if (!cmpdi_operand (oldval, mode))
30518         oldval = force_reg (mode, oldval);
30519       break;
30520
30521     default:
30522       gcc_unreachable ();
30523     }
30524
30525   switch (mode)
30526     {
30527     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30528     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30529     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30530     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30531     default:
30532       gcc_unreachable ();
30533     }
30534
30535   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30536
30537   if (mode == QImode || mode == HImode)
30538     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30539
30540   /* In all cases, we arrange for success to be signaled by Z set.
30541      This arrangement allows for the boolean result to be used directly
30542      in a subsequent branch, post optimization.  */
30543   x = gen_rtx_REG (CCmode, CC_REGNUM);
30544   x = gen_rtx_EQ (SImode, x, const0_rtx);
30545   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30546 }
30547
30548 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30549    another memory store between the load-exclusive and store-exclusive can
30550    reset the monitor from Exclusive to Open state.  This means we must wait
30551    until after reload to split the pattern, lest we get a register spill in
30552    the middle of the atomic sequence.  */
30553
30554 void
30555 arm_split_compare_and_swap (rtx operands[])
30556 {
30557   rtx rval, mem, oldval, newval, scratch;
30558   machine_mode mode;
30559   enum memmodel mod_s, mod_f;
30560   bool is_weak;
30561   rtx_code_label *label1, *label2;
30562   rtx x, cond;
30563
30564   rval = operands[0];
30565   mem = operands[1];
30566   oldval = operands[2];
30567   newval = operands[3];
30568   is_weak = (operands[4] != const0_rtx);
30569   mod_s = (enum memmodel) INTVAL (operands[5]);
30570   mod_f = (enum memmodel) INTVAL (operands[6]);
30571   scratch = operands[7];
30572   mode = GET_MODE (mem);
30573
30574   bool use_acquire = TARGET_HAVE_LDACQ
30575                      && !(mod_s == MEMMODEL_RELAXED
30576                           || mod_s == MEMMODEL_CONSUME
30577                           || mod_s == MEMMODEL_RELEASE);
30578
30579   bool use_release = TARGET_HAVE_LDACQ
30580                      && !(mod_s == MEMMODEL_RELAXED
30581                           || mod_s == MEMMODEL_CONSUME
30582                           || mod_s == MEMMODEL_ACQUIRE);
30583
30584   /* Checks whether a barrier is needed and emits one accordingly.  */
30585   if (!(use_acquire || use_release))
30586     arm_pre_atomic_barrier (mod_s);
30587
30588   label1 = NULL;
30589   if (!is_weak)
30590     {
30591       label1 = gen_label_rtx ();
30592       emit_label (label1);
30593     }
30594   label2 = gen_label_rtx ();
30595
30596   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30597
30598   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30599   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30600   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30601                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30602   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30603
30604   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30605
30606   /* Weak or strong, we want EQ to be true for success, so that we
30607      match the flags that we got from the compare above.  */
30608   cond = gen_rtx_REG (CCmode, CC_REGNUM);
30609   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30610   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30611
30612   if (!is_weak)
30613     {
30614       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30615       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30616                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30617       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30618     }
30619
30620   if (mod_f != MEMMODEL_RELAXED)
30621     emit_label (label2);
30622
30623   /* Checks whether a barrier is needed and emits one accordingly.  */
30624   if (!(use_acquire || use_release))
30625     arm_post_atomic_barrier (mod_s);
30626
30627   if (mod_f == MEMMODEL_RELAXED)
30628     emit_label (label2);
30629 }
30630
30631 void
30632 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30633                      rtx value, rtx model_rtx, rtx cond)
30634 {
30635   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30636   machine_mode mode = GET_MODE (mem);
30637   machine_mode wmode = (mode == DImode ? DImode : SImode);
30638   rtx_code_label *label;
30639   rtx x;
30640
30641   bool use_acquire = TARGET_HAVE_LDACQ
30642                      && !(model == MEMMODEL_RELAXED
30643                           || model == MEMMODEL_CONSUME
30644                           || model == MEMMODEL_RELEASE);
30645
30646   bool use_release = TARGET_HAVE_LDACQ
30647                      && !(model == MEMMODEL_RELAXED
30648                           || model == MEMMODEL_CONSUME
30649                           || model == MEMMODEL_ACQUIRE);
30650
30651   /* Checks whether a barrier is needed and emits one accordingly.  */
30652   if (!(use_acquire || use_release))
30653     arm_pre_atomic_barrier (model);
30654
30655   label = gen_label_rtx ();
30656   emit_label (label);
30657
30658   if (new_out)
30659     new_out = gen_lowpart (wmode, new_out);
30660   if (old_out)
30661     old_out = gen_lowpart (wmode, old_out);
30662   else
30663     old_out = new_out;
30664   value = simplify_gen_subreg (wmode, value, mode, 0);
30665
30666   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30667
30668   switch (code)
30669     {
30670     case SET:
30671       new_out = value;
30672       break;
30673
30674     case NOT:
30675       x = gen_rtx_AND (wmode, old_out, value);
30676       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30677       x = gen_rtx_NOT (wmode, new_out);
30678       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30679       break;
30680
30681     case MINUS:
30682       if (CONST_INT_P (value))
30683         {
30684           value = GEN_INT (-INTVAL (value));
30685           code = PLUS;
30686         }
30687       /* FALLTHRU */
30688
30689     case PLUS:
30690       if (mode == DImode)
30691         {
30692           /* DImode plus/minus need to clobber flags.  */
30693           /* The adddi3 and subdi3 patterns are incorrectly written so that
30694              they require matching operands, even when we could easily support
30695              three operands.  Thankfully, this can be fixed up post-splitting,
30696              as the individual add+adc patterns do accept three operands and
30697              post-reload cprop can make these moves go away.  */
30698           emit_move_insn (new_out, old_out);
30699           if (code == PLUS)
30700             x = gen_adddi3 (new_out, new_out, value);
30701           else
30702             x = gen_subdi3 (new_out, new_out, value);
30703           emit_insn (x);
30704           break;
30705         }
30706       /* FALLTHRU */
30707
30708     default:
30709       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30710       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30711       break;
30712     }
30713
30714   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30715                             use_release);
30716
30717   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30718   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30719
30720   /* Checks whether a barrier is needed and emits one accordingly.  */
30721   if (!(use_acquire || use_release))
30722     arm_post_atomic_barrier (model);
30723 }
30724 \f
30725 #define MAX_VECT_LEN 16
30726
30727 struct expand_vec_perm_d
30728 {
30729   rtx target, op0, op1;
30730   unsigned char perm[MAX_VECT_LEN];
30731   machine_mode vmode;
30732   unsigned char nelt;
30733   bool one_vector_p;
30734   bool testing_p;
30735 };
30736
30737 /* Generate a variable permutation.  */
30738
30739 static void
30740 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30741 {
30742   machine_mode vmode = GET_MODE (target);
30743   bool one_vector_p = rtx_equal_p (op0, op1);
30744
30745   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30746   gcc_checking_assert (GET_MODE (op0) == vmode);
30747   gcc_checking_assert (GET_MODE (op1) == vmode);
30748   gcc_checking_assert (GET_MODE (sel) == vmode);
30749   gcc_checking_assert (TARGET_NEON);
30750
30751   if (one_vector_p)
30752     {
30753       if (vmode == V8QImode)
30754         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30755       else
30756         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30757     }
30758   else
30759     {
30760       rtx pair;
30761
30762       if (vmode == V8QImode)
30763         {
30764           pair = gen_reg_rtx (V16QImode);
30765           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30766           pair = gen_lowpart (TImode, pair);
30767           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30768         }
30769       else
30770         {
30771           pair = gen_reg_rtx (OImode);
30772           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30773           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30774         }
30775     }
30776 }
30777
30778 void
30779 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30780 {
30781   machine_mode vmode = GET_MODE (target);
30782   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30783   bool one_vector_p = rtx_equal_p (op0, op1);
30784   rtx rmask[MAX_VECT_LEN], mask;
30785
30786   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30787      numbering of elements for big-endian, we must reverse the order.  */
30788   gcc_checking_assert (!BYTES_BIG_ENDIAN);
30789
30790   /* The VTBL instruction does not use a modulo index, so we must take care
30791      of that ourselves.  */
30792   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30793   for (i = 0; i < nelt; ++i)
30794     rmask[i] = mask;
30795   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30796   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30797
30798   arm_expand_vec_perm_1 (target, op0, op1, sel);
30799 }
30800
30801 /* Generate or test for an insn that supports a constant permutation.  */
30802
30803 /* Recognize patterns for the VUZP insns.  */
30804
30805 static bool
30806 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30807 {
30808   unsigned int i, odd, mask, nelt = d->nelt;
30809   rtx out0, out1, in0, in1, x;
30810   rtx (*gen)(rtx, rtx, rtx, rtx);
30811
30812   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30813     return false;
30814
30815   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30816   if (d->perm[0] == 0)
30817     odd = 0;
30818   else if (d->perm[0] == 1)
30819     odd = 1;
30820   else
30821     return false;
30822   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30823
30824   for (i = 0; i < nelt; i++)
30825     {
30826       unsigned elt = (i * 2 + odd) & mask;
30827       if (d->perm[i] != elt)
30828         return false;
30829     }
30830
30831   /* Success!  */
30832   if (d->testing_p)
30833     return true;
30834
30835   switch (d->vmode)
30836     {
30837     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30838     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
30839     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
30840     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
30841     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
30842     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
30843     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
30844     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
30845     default:
30846       gcc_unreachable ();
30847     }
30848
30849   in0 = d->op0;
30850   in1 = d->op1;
30851   if (BYTES_BIG_ENDIAN)
30852     {
30853       x = in0, in0 = in1, in1 = x;
30854       odd = !odd;
30855     }
30856
30857   out0 = d->target;
30858   out1 = gen_reg_rtx (d->vmode);
30859   if (odd)
30860     x = out0, out0 = out1, out1 = x;
30861
30862   emit_insn (gen (out0, in0, in1, out1));
30863   return true;
30864 }
30865
30866 /* Recognize patterns for the VZIP insns.  */
30867
30868 static bool
30869 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30870 {
30871   unsigned int i, high, mask, nelt = d->nelt;
30872   rtx out0, out1, in0, in1, x;
30873   rtx (*gen)(rtx, rtx, rtx, rtx);
30874
30875   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30876     return false;
30877
30878   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30879   high = nelt / 2;
30880   if (d->perm[0] == high)
30881     ;
30882   else if (d->perm[0] == 0)
30883     high = 0;
30884   else
30885     return false;
30886   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30887
30888   for (i = 0; i < nelt / 2; i++)
30889     {
30890       unsigned elt = (i + high) & mask;
30891       if (d->perm[i * 2] != elt)
30892         return false;
30893       elt = (elt + nelt) & mask;
30894       if (d->perm[i * 2 + 1] != elt)
30895         return false;
30896     }
30897
30898   /* Success!  */
30899   if (d->testing_p)
30900     return true;
30901
30902   switch (d->vmode)
30903     {
30904     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30905     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
30906     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
30907     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
30908     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
30909     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
30910     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
30911     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
30912     default:
30913       gcc_unreachable ();
30914     }
30915
30916   in0 = d->op0;
30917   in1 = d->op1;
30918   if (BYTES_BIG_ENDIAN)
30919     {
30920       x = in0, in0 = in1, in1 = x;
30921       high = !high;
30922     }
30923
30924   out0 = d->target;
30925   out1 = gen_reg_rtx (d->vmode);
30926   if (high)
30927     x = out0, out0 = out1, out1 = x;
30928
30929   emit_insn (gen (out0, in0, in1, out1));
30930   return true;
30931 }
30932
30933 /* Recognize patterns for the VREV insns.  */
30934
30935 static bool
30936 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30937 {
30938   unsigned int i, j, diff, nelt = d->nelt;
30939   rtx (*gen)(rtx, rtx);
30940
30941   if (!d->one_vector_p)
30942     return false;
30943
30944   diff = d->perm[0];
30945   switch (diff)
30946     {
30947     case 7:
30948       switch (d->vmode)
30949         {
30950         case V16QImode: gen = gen_neon_vrev64v16qi; break;
30951         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
30952         default:
30953           return false;
30954         }
30955       break;
30956     case 3:
30957       switch (d->vmode)
30958         {
30959         case V16QImode: gen = gen_neon_vrev32v16qi; break;
30960         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
30961         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
30962         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
30963         default:
30964           return false;
30965         }
30966       break;
30967     case 1:
30968       switch (d->vmode)
30969         {
30970         case V16QImode: gen = gen_neon_vrev16v16qi; break;
30971         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
30972         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
30973         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
30974         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
30975         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
30976         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
30977         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
30978         default:
30979           return false;
30980         }
30981       break;
30982     default:
30983       return false;
30984     }
30985
30986   for (i = 0; i < nelt ; i += diff + 1)
30987     for (j = 0; j <= diff; j += 1)
30988       {
30989         /* This is guaranteed to be true as the value of diff
30990            is 7, 3, 1 and we should have enough elements in the
30991            queue to generate this. Getting a vector mask with a
30992            value of diff other than these values implies that
30993            something is wrong by the time we get here.  */
30994         gcc_assert (i + j < nelt);
30995         if (d->perm[i + j] != i + diff - j)
30996           return false;
30997       }
30998
30999   /* Success! */
31000   if (d->testing_p)
31001     return true;
31002
31003   emit_insn (gen (d->target, d->op0));
31004   return true;
31005 }
31006
31007 /* Recognize patterns for the VTRN insns.  */
31008
31009 static bool
31010 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31011 {
31012   unsigned int i, odd, mask, nelt = d->nelt;
31013   rtx out0, out1, in0, in1, x;
31014   rtx (*gen)(rtx, rtx, rtx, rtx);
31015
31016   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31017     return false;
31018
31019   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31020   if (d->perm[0] == 0)
31021     odd = 0;
31022   else if (d->perm[0] == 1)
31023     odd = 1;
31024   else
31025     return false;
31026   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31027
31028   for (i = 0; i < nelt; i += 2)
31029     {
31030       if (d->perm[i] != i + odd)
31031         return false;
31032       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31033         return false;
31034     }
31035
31036   /* Success!  */
31037   if (d->testing_p)
31038     return true;
31039
31040   switch (d->vmode)
31041     {
31042     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31043     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
31044     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
31045     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
31046     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
31047     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
31048     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
31049     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
31050     default:
31051       gcc_unreachable ();
31052     }
31053
31054   in0 = d->op0;
31055   in1 = d->op1;
31056   if (BYTES_BIG_ENDIAN)
31057     {
31058       x = in0, in0 = in1, in1 = x;
31059       odd = !odd;
31060     }
31061
31062   out0 = d->target;
31063   out1 = gen_reg_rtx (d->vmode);
31064   if (odd)
31065     x = out0, out0 = out1, out1 = x;
31066
31067   emit_insn (gen (out0, in0, in1, out1));
31068   return true;
31069 }
31070
31071 /* Recognize patterns for the VEXT insns.  */
31072
31073 static bool
31074 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31075 {
31076   unsigned int i, nelt = d->nelt;
31077   rtx (*gen) (rtx, rtx, rtx, rtx);
31078   rtx offset;
31079
31080   unsigned int location;
31081
31082   unsigned int next  = d->perm[0] + 1;
31083
31084   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31085   if (BYTES_BIG_ENDIAN)
31086     return false;
31087
31088   /* Check if the extracted indexes are increasing by one.  */
31089   for (i = 1; i < nelt; next++, i++)
31090     {
31091       /* If we hit the most significant element of the 2nd vector in
31092          the previous iteration, no need to test further.  */
31093       if (next == 2 * nelt)
31094         return false;
31095
31096       /* If we are operating on only one vector: it could be a
31097          rotation.  If there are only two elements of size < 64, let
31098          arm_evpc_neon_vrev catch it.  */
31099       if (d->one_vector_p && (next == nelt))
31100         {
31101           if ((nelt == 2) && (d->vmode != V2DImode))
31102             return false;
31103           else
31104             next = 0;
31105         }
31106
31107       if (d->perm[i] != next)
31108         return false;
31109     }
31110
31111   location = d->perm[0];
31112
31113   switch (d->vmode)
31114     {
31115     case V16QImode: gen = gen_neon_vextv16qi; break;
31116     case V8QImode: gen = gen_neon_vextv8qi; break;
31117     case V4HImode: gen = gen_neon_vextv4hi; break;
31118     case V8HImode: gen = gen_neon_vextv8hi; break;
31119     case V2SImode: gen = gen_neon_vextv2si; break;
31120     case V4SImode: gen = gen_neon_vextv4si; break;
31121     case V2SFmode: gen = gen_neon_vextv2sf; break;
31122     case V4SFmode: gen = gen_neon_vextv4sf; break;
31123     case V2DImode: gen = gen_neon_vextv2di; break;
31124     default:
31125       return false;
31126     }
31127
31128   /* Success! */
31129   if (d->testing_p)
31130     return true;
31131
31132   offset = GEN_INT (location);
31133   emit_insn (gen (d->target, d->op0, d->op1, offset));
31134   return true;
31135 }
31136
31137 /* The NEON VTBL instruction is a fully variable permuation that's even
31138    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31139    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31140    can do slightly better by expanding this as a constant where we don't
31141    have to apply a mask.  */
31142
31143 static bool
31144 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31145 {
31146   rtx rperm[MAX_VECT_LEN], sel;
31147   machine_mode vmode = d->vmode;
31148   unsigned int i, nelt = d->nelt;
31149
31150   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31151      numbering of elements for big-endian, we must reverse the order.  */
31152   if (BYTES_BIG_ENDIAN)
31153     return false;
31154
31155   if (d->testing_p)
31156     return true;
31157
31158   /* Generic code will try constant permutation twice.  Once with the
31159      original mode and again with the elements lowered to QImode.
31160      So wait and don't do the selector expansion ourselves.  */
31161   if (vmode != V8QImode && vmode != V16QImode)
31162     return false;
31163
31164   for (i = 0; i < nelt; ++i)
31165     rperm[i] = GEN_INT (d->perm[i]);
31166   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31167   sel = force_reg (vmode, sel);
31168
31169   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31170   return true;
31171 }
31172
31173 static bool
31174 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31175 {
31176   /* Check if the input mask matches vext before reordering the
31177      operands.  */
31178   if (TARGET_NEON)
31179     if (arm_evpc_neon_vext (d))
31180       return true;
31181
31182   /* The pattern matching functions above are written to look for a small
31183      number to begin the sequence (0, 1, N/2).  If we begin with an index
31184      from the second operand, we can swap the operands.  */
31185   if (d->perm[0] >= d->nelt)
31186     {
31187       unsigned i, nelt = d->nelt;
31188       rtx x;
31189
31190       for (i = 0; i < nelt; ++i)
31191         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31192
31193       x = d->op0;
31194       d->op0 = d->op1;
31195       d->op1 = x;
31196     }
31197
31198   if (TARGET_NEON)
31199     {
31200       if (arm_evpc_neon_vuzp (d))
31201         return true;
31202       if (arm_evpc_neon_vzip (d))
31203         return true;
31204       if (arm_evpc_neon_vrev (d))
31205         return true;
31206       if (arm_evpc_neon_vtrn (d))
31207         return true;
31208       return arm_evpc_neon_vtbl (d);
31209     }
31210   return false;
31211 }
31212
31213 /* Expand a vec_perm_const pattern.  */
31214
31215 bool
31216 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31217 {
31218   struct expand_vec_perm_d d;
31219   int i, nelt, which;
31220
31221   d.target = target;
31222   d.op0 = op0;
31223   d.op1 = op1;
31224
31225   d.vmode = GET_MODE (target);
31226   gcc_assert (VECTOR_MODE_P (d.vmode));
31227   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31228   d.testing_p = false;
31229
31230   for (i = which = 0; i < nelt; ++i)
31231     {
31232       rtx e = XVECEXP (sel, 0, i);
31233       int ei = INTVAL (e) & (2 * nelt - 1);
31234       which |= (ei < nelt ? 1 : 2);
31235       d.perm[i] = ei;
31236     }
31237
31238   switch (which)
31239     {
31240     default:
31241       gcc_unreachable();
31242
31243     case 3:
31244       d.one_vector_p = false;
31245       if (!rtx_equal_p (op0, op1))
31246         break;
31247
31248       /* The elements of PERM do not suggest that only the first operand
31249          is used, but both operands are identical.  Allow easier matching
31250          of the permutation by folding the permutation into the single
31251          input vector.  */
31252       /* FALLTHRU */
31253     case 2:
31254       for (i = 0; i < nelt; ++i)
31255         d.perm[i] &= nelt - 1;
31256       d.op0 = op1;
31257       d.one_vector_p = true;
31258       break;
31259
31260     case 1:
31261       d.op1 = op0;
31262       d.one_vector_p = true;
31263       break;
31264     }
31265
31266   return arm_expand_vec_perm_const_1 (&d);
31267 }
31268
31269 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
31270
31271 static bool
31272 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31273                                  const unsigned char *sel)
31274 {
31275   struct expand_vec_perm_d d;
31276   unsigned int i, nelt, which;
31277   bool ret;
31278
31279   d.vmode = vmode;
31280   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31281   d.testing_p = true;
31282   memcpy (d.perm, sel, nelt);
31283
31284   /* Categorize the set of elements in the selector.  */
31285   for (i = which = 0; i < nelt; ++i)
31286     {
31287       unsigned char e = d.perm[i];
31288       gcc_assert (e < 2 * nelt);
31289       which |= (e < nelt ? 1 : 2);
31290     }
31291
31292   /* For all elements from second vector, fold the elements to first.  */
31293   if (which == 2)
31294     for (i = 0; i < nelt; ++i)
31295       d.perm[i] -= nelt;
31296
31297   /* Check whether the mask can be applied to the vector type.  */
31298   d.one_vector_p = (which != 3);
31299
31300   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31301   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31302   if (!d.one_vector_p)
31303     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31304
31305   start_sequence ();
31306   ret = arm_expand_vec_perm_const_1 (&d);
31307   end_sequence ();
31308
31309   return ret;
31310 }
31311
31312 bool
31313 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31314 {
31315   /* If we are soft float and we do not have ldrd
31316      then all auto increment forms are ok.  */
31317   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31318     return true;
31319
31320   switch (code)
31321     {
31322       /* Post increment and Pre Decrement are supported for all
31323          instruction forms except for vector forms.  */
31324     case ARM_POST_INC:
31325     case ARM_PRE_DEC:
31326       if (VECTOR_MODE_P (mode))
31327         {
31328           if (code != ARM_PRE_DEC)
31329             return true;
31330           else
31331             return false;
31332         }
31333
31334       return true;
31335
31336     case ARM_POST_DEC:
31337     case ARM_PRE_INC:
31338       /* Without LDRD and mode size greater than
31339          word size, there is no point in auto-incrementing
31340          because ldm and stm will not have these forms.  */
31341       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31342         return false;
31343
31344       /* Vector and floating point modes do not support
31345          these auto increment forms.  */
31346       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31347         return false;
31348
31349       return true;
31350
31351     default:
31352       return false;
31353
31354     }
31355
31356   return false;
31357 }
31358
31359 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31360    on ARM, since we know that shifts by negative amounts are no-ops.
31361    Additionally, the default expansion code is not available or suitable
31362    for post-reload insn splits (this can occur when the register allocator
31363    chooses not to do a shift in NEON).
31364
31365    This function is used in both initial expand and post-reload splits, and
31366    handles all kinds of 64-bit shifts.
31367
31368    Input requirements:
31369     - It is safe for the input and output to be the same register, but
31370       early-clobber rules apply for the shift amount and scratch registers.
31371     - Shift by register requires both scratch registers.  In all other cases
31372       the scratch registers may be NULL.
31373     - Ashiftrt by a register also clobbers the CC register.  */
31374 void
31375 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31376                                rtx amount, rtx scratch1, rtx scratch2)
31377 {
31378   rtx out_high = gen_highpart (SImode, out);
31379   rtx out_low = gen_lowpart (SImode, out);
31380   rtx in_high = gen_highpart (SImode, in);
31381   rtx in_low = gen_lowpart (SImode, in);
31382
31383   /* Terminology:
31384         in = the register pair containing the input value.
31385         out = the destination register pair.
31386         up = the high- or low-part of each pair.
31387         down = the opposite part to "up".
31388      In a shift, we can consider bits to shift from "up"-stream to
31389      "down"-stream, so in a left-shift "up" is the low-part and "down"
31390      is the high-part of each register pair.  */
31391
31392   rtx out_up   = code == ASHIFT ? out_low : out_high;
31393   rtx out_down = code == ASHIFT ? out_high : out_low;
31394   rtx in_up   = code == ASHIFT ? in_low : in_high;
31395   rtx in_down = code == ASHIFT ? in_high : in_low;
31396
31397   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31398   gcc_assert (out
31399               && (REG_P (out) || GET_CODE (out) == SUBREG)
31400               && GET_MODE (out) == DImode);
31401   gcc_assert (in
31402               && (REG_P (in) || GET_CODE (in) == SUBREG)
31403               && GET_MODE (in) == DImode);
31404   gcc_assert (amount
31405               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31406                    && GET_MODE (amount) == SImode)
31407                   || CONST_INT_P (amount)));
31408   gcc_assert (scratch1 == NULL
31409               || (GET_CODE (scratch1) == SCRATCH)
31410               || (GET_MODE (scratch1) == SImode
31411                   && REG_P (scratch1)));
31412   gcc_assert (scratch2 == NULL
31413               || (GET_CODE (scratch2) == SCRATCH)
31414               || (GET_MODE (scratch2) == SImode
31415                   && REG_P (scratch2)));
31416   gcc_assert (!REG_P (out) || !REG_P (amount)
31417               || !HARD_REGISTER_P (out)
31418               || (REGNO (out) != REGNO (amount)
31419                   && REGNO (out) + 1 != REGNO (amount)));
31420
31421   /* Macros to make following code more readable.  */
31422   #define SUB_32(DEST,SRC) \
31423             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31424   #define RSB_32(DEST,SRC) \
31425             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31426   #define SUB_S_32(DEST,SRC) \
31427             gen_addsi3_compare0 ((DEST), (SRC), \
31428                                  GEN_INT (-32))
31429   #define SET(DEST,SRC) \
31430             gen_rtx_SET (SImode, (DEST), (SRC))
31431   #define SHIFT(CODE,SRC,AMOUNT) \
31432             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31433   #define LSHIFT(CODE,SRC,AMOUNT) \
31434             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31435                             SImode, (SRC), (AMOUNT))
31436   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31437             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31438                             SImode, (SRC), (AMOUNT))
31439   #define ORR(A,B) \
31440             gen_rtx_IOR (SImode, (A), (B))
31441   #define BRANCH(COND,LABEL) \
31442             gen_arm_cond_branch ((LABEL), \
31443                                  gen_rtx_ ## COND (CCmode, cc_reg, \
31444                                                    const0_rtx), \
31445                                  cc_reg)
31446
31447   /* Shifts by register and shifts by constant are handled separately.  */
31448   if (CONST_INT_P (amount))
31449     {
31450       /* We have a shift-by-constant.  */
31451
31452       /* First, handle out-of-range shift amounts.
31453          In both cases we try to match the result an ARM instruction in a
31454          shift-by-register would give.  This helps reduce execution
31455          differences between optimization levels, but it won't stop other
31456          parts of the compiler doing different things.  This is "undefined
31457          behaviour, in any case.  */
31458       if (INTVAL (amount) <= 0)
31459         emit_insn (gen_movdi (out, in));
31460       else if (INTVAL (amount) >= 64)
31461         {
31462           if (code == ASHIFTRT)
31463             {
31464               rtx const31_rtx = GEN_INT (31);
31465               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31466               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31467             }
31468           else
31469             emit_insn (gen_movdi (out, const0_rtx));
31470         }
31471
31472       /* Now handle valid shifts. */
31473       else if (INTVAL (amount) < 32)
31474         {
31475           /* Shifts by a constant less than 32.  */
31476           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31477
31478           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31479           emit_insn (SET (out_down,
31480                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
31481                                out_down)));
31482           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31483         }
31484       else
31485         {
31486           /* Shifts by a constant greater than 31.  */
31487           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31488
31489           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31490           if (code == ASHIFTRT)
31491             emit_insn (gen_ashrsi3 (out_up, in_up,
31492                                     GEN_INT (31)));
31493           else
31494             emit_insn (SET (out_up, const0_rtx));
31495         }
31496     }
31497   else
31498     {
31499       /* We have a shift-by-register.  */
31500       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31501
31502       /* This alternative requires the scratch registers.  */
31503       gcc_assert (scratch1 && REG_P (scratch1));
31504       gcc_assert (scratch2 && REG_P (scratch2));
31505
31506       /* We will need the values "amount-32" and "32-amount" later.
31507          Swapping them around now allows the later code to be more general. */
31508       switch (code)
31509         {
31510         case ASHIFT:
31511           emit_insn (SUB_32 (scratch1, amount));
31512           emit_insn (RSB_32 (scratch2, amount));
31513           break;
31514         case ASHIFTRT:
31515           emit_insn (RSB_32 (scratch1, amount));
31516           /* Also set CC = amount > 32.  */
31517           emit_insn (SUB_S_32 (scratch2, amount));
31518           break;
31519         case LSHIFTRT:
31520           emit_insn (RSB_32 (scratch1, amount));
31521           emit_insn (SUB_32 (scratch2, amount));
31522           break;
31523         default:
31524           gcc_unreachable ();
31525         }
31526
31527       /* Emit code like this:
31528
31529          arithmetic-left:
31530             out_down = in_down << amount;
31531             out_down = (in_up << (amount - 32)) | out_down;
31532             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31533             out_up = in_up << amount;
31534
31535          arithmetic-right:
31536             out_down = in_down >> amount;
31537             out_down = (in_up << (32 - amount)) | out_down;
31538             if (amount < 32)
31539               out_down = ((signed)in_up >> (amount - 32)) | out_down;
31540             out_up = in_up << amount;
31541
31542          logical-right:
31543             out_down = in_down >> amount;
31544             out_down = (in_up << (32 - amount)) | out_down;
31545             if (amount < 32)
31546               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31547             out_up = in_up << amount;
31548
31549           The ARM and Thumb2 variants are the same but implemented slightly
31550           differently.  If this were only called during expand we could just
31551           use the Thumb2 case and let combine do the right thing, but this
31552           can also be called from post-reload splitters.  */
31553
31554       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31555
31556       if (!TARGET_THUMB2)
31557         {
31558           /* Emit code for ARM mode.  */
31559           emit_insn (SET (out_down,
31560                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31561           if (code == ASHIFTRT)
31562             {
31563               rtx_code_label *done_label = gen_label_rtx ();
31564               emit_jump_insn (BRANCH (LT, done_label));
31565               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31566                                              out_down)));
31567               emit_label (done_label);
31568             }
31569           else
31570             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31571                                            out_down)));
31572         }
31573       else
31574         {
31575           /* Emit code for Thumb2 mode.
31576              Thumb2 can't do shift and or in one insn.  */
31577           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31578           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31579
31580           if (code == ASHIFTRT)
31581             {
31582               rtx_code_label *done_label = gen_label_rtx ();
31583               emit_jump_insn (BRANCH (LT, done_label));
31584               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31585               emit_insn (SET (out_down, ORR (out_down, scratch2)));
31586               emit_label (done_label);
31587             }
31588           else
31589             {
31590               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31591               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31592             }
31593         }
31594
31595       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31596     }
31597
31598   #undef SUB_32
31599   #undef RSB_32
31600   #undef SUB_S_32
31601   #undef SET
31602   #undef SHIFT
31603   #undef LSHIFT
31604   #undef REV_LSHIFT
31605   #undef ORR
31606   #undef BRANCH
31607 }
31608
31609
31610 /* Returns true if a valid comparison operation and makes
31611    the operands in a form that is valid.  */
31612 bool
31613 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31614 {
31615   enum rtx_code code = GET_CODE (*comparison);
31616   int code_int;
31617   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31618     ? GET_MODE (*op2) : GET_MODE (*op1);
31619
31620   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31621
31622   if (code == UNEQ || code == LTGT)
31623     return false;
31624
31625   code_int = (int)code;
31626   arm_canonicalize_comparison (&code_int, op1, op2, 0);
31627   PUT_CODE (*comparison, (enum rtx_code)code_int);
31628
31629   switch (mode)
31630     {
31631     case SImode:
31632       if (!arm_add_operand (*op1, mode))
31633         *op1 = force_reg (mode, *op1);
31634       if (!arm_add_operand (*op2, mode))
31635         *op2 = force_reg (mode, *op2);
31636       return true;
31637
31638     case DImode:
31639       if (!cmpdi_operand (*op1, mode))
31640         *op1 = force_reg (mode, *op1);
31641       if (!cmpdi_operand (*op2, mode))
31642         *op2 = force_reg (mode, *op2);
31643       return true;
31644
31645     case SFmode:
31646     case DFmode:
31647       if (!arm_float_compare_operand (*op1, mode))
31648         *op1 = force_reg (mode, *op1);
31649       if (!arm_float_compare_operand (*op2, mode))
31650         *op2 = force_reg (mode, *op2);
31651       return true;
31652     default:
31653       break;
31654     }
31655
31656   return false;
31657
31658 }
31659
31660 /* Maximum number of instructions to set block of memory.  */
31661 static int
31662 arm_block_set_max_insns (void)
31663 {
31664   if (optimize_function_for_size_p (cfun))
31665     return 4;
31666   else
31667     return current_tune->max_insns_inline_memset;
31668 }
31669
31670 /* Return TRUE if it's profitable to set block of memory for
31671    non-vectorized case.  VAL is the value to set the memory
31672    with.  LENGTH is the number of bytes to set.  ALIGN is the
31673    alignment of the destination memory in bytes.  UNALIGNED_P
31674    is TRUE if we can only set the memory with instructions
31675    meeting alignment requirements.  USE_STRD_P is TRUE if we
31676    can use strd to set the memory.  */
31677 static bool
31678 arm_block_set_non_vect_profit_p (rtx val,
31679                                  unsigned HOST_WIDE_INT length,
31680                                  unsigned HOST_WIDE_INT align,
31681                                  bool unaligned_p, bool use_strd_p)
31682 {
31683   int num = 0;
31684   /* For leftovers in bytes of 0-7, we can set the memory block using
31685      strb/strh/str with minimum instruction number.  */
31686   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31687
31688   if (unaligned_p)
31689     {
31690       num = arm_const_inline_cost (SET, val);
31691       num += length / align + length % align;
31692     }
31693   else if (use_strd_p)
31694     {
31695       num = arm_const_double_inline_cost (val);
31696       num += (length >> 3) + leftover[length & 7];
31697     }
31698   else
31699     {
31700       num = arm_const_inline_cost (SET, val);
31701       num += (length >> 2) + leftover[length & 3];
31702     }
31703
31704   /* We may be able to combine last pair STRH/STRB into a single STR
31705      by shifting one byte back.  */
31706   if (unaligned_access && length > 3 && (length & 3) == 3)
31707     num--;
31708
31709   return (num <= arm_block_set_max_insns ());
31710 }
31711
31712 /* Return TRUE if it's profitable to set block of memory for
31713    vectorized case.  LENGTH is the number of bytes to set.
31714    ALIGN is the alignment of destination memory in bytes.
31715    MODE is the vector mode used to set the memory.  */
31716 static bool
31717 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31718                              unsigned HOST_WIDE_INT align,
31719                              machine_mode mode)
31720 {
31721   int num;
31722   bool unaligned_p = ((align & 3) != 0);
31723   unsigned int nelt = GET_MODE_NUNITS (mode);
31724
31725   /* Instruction loading constant value.  */
31726   num = 1;
31727   /* Instructions storing the memory.  */
31728   num += (length + nelt - 1) / nelt;
31729   /* Instructions adjusting the address expression.  Only need to
31730      adjust address expression if it's 4 bytes aligned and bytes
31731      leftover can only be stored by mis-aligned store instruction.  */
31732   if (!unaligned_p && (length & 3) != 0)
31733     num++;
31734
31735   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
31736   if (!unaligned_p && mode == V16QImode)
31737     num--;
31738
31739   return (num <= arm_block_set_max_insns ());
31740 }
31741
31742 /* Set a block of memory using vectorization instructions for the
31743    unaligned case.  We fill the first LENGTH bytes of the memory
31744    area starting from DSTBASE with byte constant VALUE.  ALIGN is
31745    the alignment requirement of memory.  Return TRUE if succeeded.  */
31746 static bool
31747 arm_block_set_unaligned_vect (rtx dstbase,
31748                               unsigned HOST_WIDE_INT length,
31749                               unsigned HOST_WIDE_INT value,
31750                               unsigned HOST_WIDE_INT align)
31751 {
31752   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31753   rtx dst, mem;
31754   rtx val_elt, val_vec, reg;
31755   rtx rval[MAX_VECT_LEN];
31756   rtx (*gen_func) (rtx, rtx);
31757   machine_mode mode;
31758   unsigned HOST_WIDE_INT v = value;
31759
31760   gcc_assert ((align & 0x3) != 0);
31761   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31762   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31763   if (length >= nelt_v16)
31764     {
31765       mode = V16QImode;
31766       gen_func = gen_movmisalignv16qi;
31767     }
31768   else
31769     {
31770       mode = V8QImode;
31771       gen_func = gen_movmisalignv8qi;
31772     }
31773   nelt_mode = GET_MODE_NUNITS (mode);
31774   gcc_assert (length >= nelt_mode);
31775   /* Skip if it isn't profitable.  */
31776   if (!arm_block_set_vect_profit_p (length, align, mode))
31777     return false;
31778
31779   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31780   mem = adjust_automodify_address (dstbase, mode, dst, 0);
31781
31782   v = sext_hwi (v, BITS_PER_WORD);
31783   val_elt = GEN_INT (v);
31784   for (j = 0; j < nelt_mode; j++)
31785     rval[j] = val_elt;
31786
31787   reg = gen_reg_rtx (mode);
31788   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31789   /* Emit instruction loading the constant value.  */
31790   emit_move_insn (reg, val_vec);
31791
31792   /* Handle nelt_mode bytes in a vector.  */
31793   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31794     {
31795       emit_insn ((*gen_func) (mem, reg));
31796       if (i + 2 * nelt_mode <= length)
31797         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31798     }
31799
31800   /* If there are not less than nelt_v8 bytes leftover, we must be in
31801      V16QI mode.  */
31802   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31803
31804   /* Handle (8, 16) bytes leftover.  */
31805   if (i + nelt_v8 < length)
31806     {
31807       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31808       /* We are shifting bytes back, set the alignment accordingly.  */
31809       if ((length & 1) != 0 && align >= 2)
31810         set_mem_align (mem, BITS_PER_UNIT);
31811
31812       emit_insn (gen_movmisalignv16qi (mem, reg));
31813     }
31814   /* Handle (0, 8] bytes leftover.  */
31815   else if (i < length && i + nelt_v8 >= length)
31816     {
31817       if (mode == V16QImode)
31818         {
31819           reg = gen_lowpart (V8QImode, reg);
31820           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31821         }
31822       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31823                                               + (nelt_mode - nelt_v8))));
31824       /* We are shifting bytes back, set the alignment accordingly.  */
31825       if ((length & 1) != 0 && align >= 2)
31826         set_mem_align (mem, BITS_PER_UNIT);
31827
31828       emit_insn (gen_movmisalignv8qi (mem, reg));
31829     }
31830
31831   return true;
31832 }
31833
31834 /* Set a block of memory using vectorization instructions for the
31835    aligned case.  We fill the first LENGTH bytes of the memory area
31836    starting from DSTBASE with byte constant VALUE.  ALIGN is the
31837    alignment requirement of memory.  Return TRUE if succeeded.  */
31838 static bool
31839 arm_block_set_aligned_vect (rtx dstbase,
31840                             unsigned HOST_WIDE_INT length,
31841                             unsigned HOST_WIDE_INT value,
31842                             unsigned HOST_WIDE_INT align)
31843 {
31844   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31845   rtx dst, addr, mem;
31846   rtx val_elt, val_vec, reg;
31847   rtx rval[MAX_VECT_LEN];
31848   machine_mode mode;
31849   unsigned HOST_WIDE_INT v = value;
31850
31851   gcc_assert ((align & 0x3) == 0);
31852   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31853   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31854   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31855     mode = V16QImode;
31856   else
31857     mode = V8QImode;
31858
31859   nelt_mode = GET_MODE_NUNITS (mode);
31860   gcc_assert (length >= nelt_mode);
31861   /* Skip if it isn't profitable.  */
31862   if (!arm_block_set_vect_profit_p (length, align, mode))
31863     return false;
31864
31865   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31866
31867   v = sext_hwi (v, BITS_PER_WORD);
31868   val_elt = GEN_INT (v);
31869   for (j = 0; j < nelt_mode; j++)
31870     rval[j] = val_elt;
31871
31872   reg = gen_reg_rtx (mode);
31873   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31874   /* Emit instruction loading the constant value.  */
31875   emit_move_insn (reg, val_vec);
31876
31877   i = 0;
31878   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
31879   if (mode == V16QImode)
31880     {
31881       mem = adjust_automodify_address (dstbase, mode, dst, 0);
31882       emit_insn (gen_movmisalignv16qi (mem, reg));
31883       i += nelt_mode;
31884       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
31885       if (i + nelt_v8 < length && i + nelt_v16 > length)
31886         {
31887           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31888           mem = adjust_automodify_address (dstbase, mode, dst, 0);
31889           /* We are shifting bytes back, set the alignment accordingly.  */
31890           if ((length & 0x3) == 0)
31891             set_mem_align (mem, BITS_PER_UNIT * 4);
31892           else if ((length & 0x1) == 0)
31893             set_mem_align (mem, BITS_PER_UNIT * 2);
31894           else
31895             set_mem_align (mem, BITS_PER_UNIT);
31896
31897           emit_insn (gen_movmisalignv16qi (mem, reg));
31898           return true;
31899         }
31900       /* Fall through for bytes leftover.  */
31901       mode = V8QImode;
31902       nelt_mode = GET_MODE_NUNITS (mode);
31903       reg = gen_lowpart (V8QImode, reg);
31904     }
31905
31906   /* Handle 8 bytes in a vector.  */
31907   for (; (i + nelt_mode <= length); i += nelt_mode)
31908     {
31909       addr = plus_constant (Pmode, dst, i);
31910       mem = adjust_automodify_address (dstbase, mode, addr, i);
31911       emit_move_insn (mem, reg);
31912     }
31913
31914   /* Handle single word leftover by shifting 4 bytes back.  We can
31915      use aligned access for this case.  */
31916   if (i + UNITS_PER_WORD == length)
31917     {
31918       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31919       mem = adjust_automodify_address (dstbase, mode,
31920                                        addr, i - UNITS_PER_WORD);
31921       /* We are shifting 4 bytes back, set the alignment accordingly.  */
31922       if (align > UNITS_PER_WORD)
31923         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31924
31925       emit_move_insn (mem, reg);
31926     }
31927   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31928      We have to use unaligned access for this case.  */
31929   else if (i < length)
31930     {
31931       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31932       mem = adjust_automodify_address (dstbase, mode, dst, 0);
31933       /* We are shifting bytes back, set the alignment accordingly.  */
31934       if ((length & 1) == 0)
31935         set_mem_align (mem, BITS_PER_UNIT * 2);
31936       else
31937         set_mem_align (mem, BITS_PER_UNIT);
31938
31939       emit_insn (gen_movmisalignv8qi (mem, reg));
31940     }
31941
31942   return true;
31943 }
31944
31945 /* Set a block of memory using plain strh/strb instructions, only
31946    using instructions allowed by ALIGN on processor.  We fill the
31947    first LENGTH bytes of the memory area starting from DSTBASE
31948    with byte constant VALUE.  ALIGN is the alignment requirement
31949    of memory.  */
31950 static bool
31951 arm_block_set_unaligned_non_vect (rtx dstbase,
31952                                   unsigned HOST_WIDE_INT length,
31953                                   unsigned HOST_WIDE_INT value,
31954                                   unsigned HOST_WIDE_INT align)
31955 {
31956   unsigned int i;
31957   rtx dst, addr, mem;
31958   rtx val_exp, val_reg, reg;
31959   machine_mode mode;
31960   HOST_WIDE_INT v = value;
31961
31962   gcc_assert (align == 1 || align == 2);
31963
31964   if (align == 2)
31965     v |= (value << BITS_PER_UNIT);
31966
31967   v = sext_hwi (v, BITS_PER_WORD);
31968   val_exp = GEN_INT (v);
31969   /* Skip if it isn't profitable.  */
31970   if (!arm_block_set_non_vect_profit_p (val_exp, length,
31971                                         align, true, false))
31972     return false;
31973
31974   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31975   mode = (align == 2 ? HImode : QImode);
31976   val_reg = force_reg (SImode, val_exp);
31977   reg = gen_lowpart (mode, val_reg);
31978
31979   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
31980     {
31981       addr = plus_constant (Pmode, dst, i);
31982       mem = adjust_automodify_address (dstbase, mode, addr, i);
31983       emit_move_insn (mem, reg);
31984     }
31985
31986   /* Handle single byte leftover.  */
31987   if (i + 1 == length)
31988     {
31989       reg = gen_lowpart (QImode, val_reg);
31990       addr = plus_constant (Pmode, dst, i);
31991       mem = adjust_automodify_address (dstbase, QImode, addr, i);
31992       emit_move_insn (mem, reg);
31993       i++;
31994     }
31995
31996   gcc_assert (i == length);
31997   return true;
31998 }
31999
32000 /* Set a block of memory using plain strd/str/strh/strb instructions,
32001    to permit unaligned copies on processors which support unaligned
32002    semantics for those instructions.  We fill the first LENGTH bytes
32003    of the memory area starting from DSTBASE with byte constant VALUE.
32004    ALIGN is the alignment requirement of memory.  */
32005 static bool
32006 arm_block_set_aligned_non_vect (rtx dstbase,
32007                                 unsigned HOST_WIDE_INT length,
32008                                 unsigned HOST_WIDE_INT value,
32009                                 unsigned HOST_WIDE_INT align)
32010 {
32011   unsigned int i;
32012   rtx dst, addr, mem;
32013   rtx val_exp, val_reg, reg;
32014   unsigned HOST_WIDE_INT v;
32015   bool use_strd_p;
32016
32017   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32018                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32019
32020   v = (value | (value << 8) | (value << 16) | (value << 24));
32021   if (length < UNITS_PER_WORD)
32022     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32023
32024   if (use_strd_p)
32025     v |= (v << BITS_PER_WORD);
32026   else
32027     v = sext_hwi (v, BITS_PER_WORD);
32028
32029   val_exp = GEN_INT (v);
32030   /* Skip if it isn't profitable.  */
32031   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32032                                         align, false, use_strd_p))
32033     {
32034       if (!use_strd_p)
32035         return false;
32036
32037       /* Try without strd.  */
32038       v = (v >> BITS_PER_WORD);
32039       v = sext_hwi (v, BITS_PER_WORD);
32040       val_exp = GEN_INT (v);
32041       use_strd_p = false;
32042       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32043                                             align, false, use_strd_p))
32044         return false;
32045     }
32046
32047   i = 0;
32048   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32049   /* Handle double words using strd if possible.  */
32050   if (use_strd_p)
32051     {
32052       val_reg = force_reg (DImode, val_exp);
32053       reg = val_reg;
32054       for (; (i + 8 <= length); i += 8)
32055         {
32056           addr = plus_constant (Pmode, dst, i);
32057           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32058           emit_move_insn (mem, reg);
32059         }
32060     }
32061   else
32062     val_reg = force_reg (SImode, val_exp);
32063
32064   /* Handle words.  */
32065   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32066   for (; (i + 4 <= length); i += 4)
32067     {
32068       addr = plus_constant (Pmode, dst, i);
32069       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32070       if ((align & 3) == 0)
32071         emit_move_insn (mem, reg);
32072       else
32073         emit_insn (gen_unaligned_storesi (mem, reg));
32074     }
32075
32076   /* Merge last pair of STRH and STRB into a STR if possible.  */
32077   if (unaligned_access && i > 0 && (i + 3) == length)
32078     {
32079       addr = plus_constant (Pmode, dst, i - 1);
32080       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32081       /* We are shifting one byte back, set the alignment accordingly.  */
32082       if ((align & 1) == 0)
32083         set_mem_align (mem, BITS_PER_UNIT);
32084
32085       /* Most likely this is an unaligned access, and we can't tell at
32086          compilation time.  */
32087       emit_insn (gen_unaligned_storesi (mem, reg));
32088       return true;
32089     }
32090
32091   /* Handle half word leftover.  */
32092   if (i + 2 <= length)
32093     {
32094       reg = gen_lowpart (HImode, val_reg);
32095       addr = plus_constant (Pmode, dst, i);
32096       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32097       if ((align & 1) == 0)
32098         emit_move_insn (mem, reg);
32099       else
32100         emit_insn (gen_unaligned_storehi (mem, reg));
32101
32102       i += 2;
32103     }
32104
32105   /* Handle single byte leftover.  */
32106   if (i + 1 == length)
32107     {
32108       reg = gen_lowpart (QImode, val_reg);
32109       addr = plus_constant (Pmode, dst, i);
32110       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32111       emit_move_insn (mem, reg);
32112     }
32113
32114   return true;
32115 }
32116
32117 /* Set a block of memory using vectorization instructions for both
32118    aligned and unaligned cases.  We fill the first LENGTH bytes of
32119    the memory area starting from DSTBASE with byte constant VALUE.
32120    ALIGN is the alignment requirement of memory.  */
32121 static bool
32122 arm_block_set_vect (rtx dstbase,
32123                     unsigned HOST_WIDE_INT length,
32124                     unsigned HOST_WIDE_INT value,
32125                     unsigned HOST_WIDE_INT align)
32126 {
32127   /* Check whether we need to use unaligned store instruction.  */
32128   if (((align & 3) != 0 || (length & 3) != 0)
32129       /* Check whether unaligned store instruction is available.  */
32130       && (!unaligned_access || BYTES_BIG_ENDIAN))
32131     return false;
32132
32133   if ((align & 3) == 0)
32134     return arm_block_set_aligned_vect (dstbase, length, value, align);
32135   else
32136     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32137 }
32138
32139 /* Expand string store operation.  Firstly we try to do that by using
32140    vectorization instructions, then try with ARM unaligned access and
32141    double-word store if profitable.  OPERANDS[0] is the destination,
32142    OPERANDS[1] is the number of bytes, operands[2] is the value to
32143    initialize the memory, OPERANDS[3] is the known alignment of the
32144    destination.  */
32145 bool
32146 arm_gen_setmem (rtx *operands)
32147 {
32148   rtx dstbase = operands[0];
32149   unsigned HOST_WIDE_INT length;
32150   unsigned HOST_WIDE_INT value;
32151   unsigned HOST_WIDE_INT align;
32152
32153   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32154     return false;
32155
32156   length = UINTVAL (operands[1]);
32157   if (length > 64)
32158     return false;
32159
32160   value = (UINTVAL (operands[2]) & 0xFF);
32161   align = UINTVAL (operands[3]);
32162   if (TARGET_NEON && length >= 8
32163       && current_tune->string_ops_prefer_neon
32164       && arm_block_set_vect (dstbase, length, value, align))
32165     return true;
32166
32167   if (!unaligned_access && (align & 3) != 0)
32168     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32169
32170   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32171 }
32172
32173 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
32174
32175 static unsigned HOST_WIDE_INT
32176 arm_asan_shadow_offset (void)
32177 {
32178   return (unsigned HOST_WIDE_INT) 1 << 29;
32179 }
32180
32181
32182 /* This is a temporary fix for PR60655.  Ideally we need
32183    to handle most of these cases in the generic part but
32184    currently we reject minus (..) (sym_ref).  We try to
32185    ameliorate the case with minus (sym_ref1) (sym_ref2)
32186    where they are in the same section.  */
32187
32188 static bool
32189 arm_const_not_ok_for_debug_p (rtx p)
32190 {
32191   tree decl_op0 = NULL;
32192   tree decl_op1 = NULL;
32193
32194   if (GET_CODE (p) == MINUS)
32195     {
32196       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32197         {
32198           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32199           if (decl_op1
32200               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32201               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32202             {
32203               if ((TREE_CODE (decl_op1) == VAR_DECL
32204                    || TREE_CODE (decl_op1) == CONST_DECL)
32205                   && (TREE_CODE (decl_op0) == VAR_DECL
32206                       || TREE_CODE (decl_op0) == CONST_DECL))
32207                 return (get_variable_section (decl_op1, false)
32208                         != get_variable_section (decl_op0, false));
32209
32210               if (TREE_CODE (decl_op1) == LABEL_DECL
32211                   && TREE_CODE (decl_op0) == LABEL_DECL)
32212                 return (DECL_CONTEXT (decl_op1)
32213                         != DECL_CONTEXT (decl_op0));
32214             }
32215
32216           return true;
32217         }
32218     }
32219
32220   return false;
32221 }
32222
32223 static void
32224 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32225 {
32226   const unsigned ARM_FE_INVALID = 1;
32227   const unsigned ARM_FE_DIVBYZERO = 2;
32228   const unsigned ARM_FE_OVERFLOW = 4;
32229   const unsigned ARM_FE_UNDERFLOW = 8;
32230   const unsigned ARM_FE_INEXACT = 16;
32231   const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32232                                                     | ARM_FE_DIVBYZERO
32233                                                     | ARM_FE_OVERFLOW
32234                                                     | ARM_FE_UNDERFLOW
32235                                                     | ARM_FE_INEXACT);
32236   const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32237   tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32238   tree new_fenv_var, reload_fenv, restore_fnenv;
32239   tree update_call, atomic_feraiseexcept, hold_fnclex;
32240
32241   if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32242     return;
32243
32244   /* Generate the equivalent of :
32245        unsigned int fenv_var;
32246        fenv_var = __builtin_arm_get_fpscr ();
32247
32248        unsigned int masked_fenv;
32249        masked_fenv = fenv_var & mask;
32250
32251        __builtin_arm_set_fpscr (masked_fenv);  */
32252
32253   fenv_var = create_tmp_var (unsigned_type_node, NULL);
32254   get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32255   set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32256   mask = build_int_cst (unsigned_type_node,
32257                         ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32258                           | ARM_FE_ALL_EXCEPT));
32259   ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32260                     fenv_var, build_call_expr (get_fpscr, 0));
32261   masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32262   hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32263   *hold = build2 (COMPOUND_EXPR, void_type_node,
32264                   build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32265                   hold_fnclex);
32266
32267   /* Store the value of masked_fenv to clear the exceptions:
32268      __builtin_arm_set_fpscr (masked_fenv);  */
32269
32270   *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32271
32272   /* Generate the equivalent of :
32273        unsigned int new_fenv_var;
32274        new_fenv_var = __builtin_arm_get_fpscr ();
32275
32276        __builtin_arm_set_fpscr (fenv_var);
32277
32278        __atomic_feraiseexcept (new_fenv_var);  */
32279
32280   new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32281   reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32282                         build_call_expr (get_fpscr, 0));
32283   restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32284   atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32285   update_call = build_call_expr (atomic_feraiseexcept, 1,
32286                                  fold_convert (integer_type_node, new_fenv_var));
32287   *update = build2 (COMPOUND_EXPR, void_type_node,
32288                     build2 (COMPOUND_EXPR, void_type_node,
32289                             reload_fenv, restore_fnenv), update_call);
32290 }
32291
32292 /* return TRUE if x is a reference to a value in a constant pool */
32293 extern bool
32294 arm_is_constant_pool_ref (rtx x)
32295 {
32296   return (MEM_P (x)
32297           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32298           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32299 }
32300
32301 /* If MEM is in the form of [base+offset], extract the two parts
32302    of address and set to BASE and OFFSET, otherwise return false
32303    after clearing BASE and OFFSET.  */
32304
32305 static bool
32306 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
32307 {
32308   rtx addr;
32309
32310   gcc_assert (MEM_P (mem));
32311
32312   addr = XEXP (mem, 0);
32313
32314   /* Strip off const from addresses like (const (addr)).  */
32315   if (GET_CODE (addr) == CONST)
32316     addr = XEXP (addr, 0);
32317
32318   if (GET_CODE (addr) == REG)
32319     {
32320       *base = addr;
32321       *offset = const0_rtx;
32322       return true;
32323     }
32324
32325   if (GET_CODE (addr) == PLUS
32326       && GET_CODE (XEXP (addr, 0)) == REG
32327       && CONST_INT_P (XEXP (addr, 1)))
32328     {
32329       *base = XEXP (addr, 0);
32330       *offset = XEXP (addr, 1);
32331       return true;
32332     }
32333
32334   *base = NULL_RTX;
32335   *offset = NULL_RTX;
32336
32337   return false;
32338 }
32339
32340 /* If INSN is a load or store of address in the form of [base+offset],
32341    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
32342    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
32343    otherwise return FALSE.  */
32344
32345 static bool
32346 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
32347 {
32348   rtx x, dest, src;
32349
32350   gcc_assert (INSN_P (insn));
32351   x = PATTERN (insn);
32352   if (GET_CODE (x) != SET)
32353     return false;
32354
32355   src = SET_SRC (x);
32356   dest = SET_DEST (x);
32357   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
32358     {
32359       *is_load = false;
32360       extract_base_offset_in_addr (dest, base, offset);
32361     }
32362   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
32363     {
32364       *is_load = true;
32365       extract_base_offset_in_addr (src, base, offset);
32366     }
32367   else
32368     return false;
32369
32370   return (*base != NULL_RTX && *offset != NULL_RTX);
32371 }
32372
32373 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
32374
32375    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
32376    and PRI are only calculated for these instructions.  For other instruction,
32377    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
32378    instruction fusion can be supported by returning different priorities.
32379
32380    It's important that irrelevant instructions get the largest FUSION_PRI.  */
32381
32382 static void
32383 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
32384                            int *fusion_pri, int *pri)
32385 {
32386   int tmp, off_val;
32387   bool is_load;
32388   rtx base, offset;
32389
32390   gcc_assert (INSN_P (insn));
32391
32392   tmp = max_pri - 1;
32393   if (!fusion_load_store (insn, &base, &offset, &is_load))
32394     {
32395       *pri = tmp;
32396       *fusion_pri = tmp;
32397       return;
32398     }
32399
32400   /* Load goes first.  */
32401   if (is_load)
32402     *fusion_pri = tmp - 1;
32403   else
32404     *fusion_pri = tmp - 2;
32405
32406   tmp /= 2;
32407
32408   /* INSN with smaller base register goes first.  */
32409   tmp -= ((REGNO (base) & 0xff) << 20);
32410
32411   /* INSN with smaller offset goes first.  */
32412   off_val = (int)(INTVAL (offset));
32413   if (off_val >= 0)
32414     tmp -= (off_val & 0xfffff);
32415   else
32416     tmp += ((- off_val) & 0xfffff);
32417
32418   *pri = tmp;
32419   return;
32420 }
32421 #include "gt-arm.h"