gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2014 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "stringpool.h"
  31 #include "stor-layout.h"
  32 #include "calls.h"
  33 #include "varasm.h"
  34 #include "obstack.h"
  35 #include "regs.h"
  36 #include "hard-reg-set.h"
  37 #include "insn-config.h"
  38 #include "conditions.h"
  39 #include "output.h"
  40 #include "insn-attr.h"
  41 #include "flags.h"
  42 #include "reload.h"
  43 #include "function.h"
  44 #include "expr.h"
  45 #include "optabs.h"
  46 #include "diagnostic-core.h"
  47 #include "recog.h"
  48 #include "cgraph.h"
  49 #include "ggc.h"
  50 #include "except.h"
  51 #include "tm_p.h"
  52 #include "target.h"
  53 #include "target-def.h"
  54 #include "debug.h"
  55 #include "langhooks.h"
  56 #include "df.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62
  63 /* Forward definitions of types.  */
  64 typedef struct minipool_node    Mnode;
  65 typedef struct minipool_fixup   Mfix;
  66
  67 void (*arm_lang_output_object_attributes_hook)(void);
  68
  69 struct four_ints
  70 {
  71   int i[4];
  72 };
  73
  74 /* Forward function declarations.  */
  75 static bool arm_const_not_ok_for_debug_p (rtx);
  76 static bool arm_lra_p (void);
  77 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
  78 static int arm_compute_static_chain_stack_bytes (void);
  79 static arm_stack_offsets *arm_get_frame_offsets (void);
  80 static void arm_add_gc_roots (void);
  81 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
  82                              HOST_WIDE_INT, rtx, rtx, int, int);
  83 static unsigned bit_count (unsigned long);
  84 static int arm_address_register_rtx_p (rtx, int);
  85 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
  86 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
  87 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
  88 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
  89 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
  90 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
  91 inline static int thumb1_index_register_rtx_p (rtx, int);
  92 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
  93 static int thumb_far_jump_used_p (void);
  94 static bool thumb_force_lr_save (void);
  95 static unsigned arm_size_return_regs (void);
  96 static bool arm_assemble_integer (rtx, unsigned int, int);
  97 static void arm_print_operand (FILE *, rtx, int);
  98 static void arm_print_operand_address (FILE *, rtx);
  99 static bool arm_print_operand_punct_valid_p (unsigned char code);
 100 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 101 static arm_cc get_arm_condition_code (rtx);
 102 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 103 static const char *output_multi_immediate (rtx *, const char *, const char *,
 104                                            int, HOST_WIDE_INT);
 105 static const char *shift_op (rtx, HOST_WIDE_INT *);
 106 static struct machine_function *arm_init_machine_status (void);
 107 static void thumb_exit (FILE *, int);
 108 static HOST_WIDE_INT get_jump_table_size (rtx);
 109 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 110 static Mnode *add_minipool_forward_ref (Mfix *);
 111 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 112 static Mnode *add_minipool_backward_ref (Mfix *);
 113 static void assign_minipool_offsets (Mfix *);
 114 static void arm_print_value (FILE *, rtx);
 115 static void dump_minipool (rtx);
 116 static int arm_barrier_cost (rtx);
 117 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 118 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
 119 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
 120                                rtx);
 121 static void arm_reorg (void);
 122 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
 123 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 124 static unsigned long arm_compute_save_reg_mask (void);
 125 static unsigned long arm_isr_value (tree);
 126 static unsigned long arm_compute_func_type (void);
 127 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 128 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 129 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 130 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 131 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 132 #endif
 133 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 134 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 135 static int arm_comp_type_attributes (const_tree, const_tree);
 136 static void arm_set_default_type_attributes (tree);
 137 static int arm_adjust_cost (rtx, rtx, rtx, int);
 138 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
 139 static int optimal_immediate_sequence (enum rtx_code code,
 140                                        unsigned HOST_WIDE_INT val,
 141                                        struct four_ints *return_sequence);
 142 static int optimal_immediate_sequence_1 (enum rtx_code code,
 143                                          unsigned HOST_WIDE_INT val,
 144                                          struct four_ints *return_sequence,
 145                                          int i);
 146 static int arm_get_strip_length (int);
 147 static bool arm_function_ok_for_sibcall (tree, tree);
 148 static enum machine_mode arm_promote_function_mode (const_tree,
 149                                                     enum machine_mode, int *,
 150                                                     const_tree, int);
 151 static bool arm_return_in_memory (const_tree, const_tree);
 152 static rtx arm_function_value (const_tree, const_tree, bool);
 153 static rtx arm_libcall_value_1 (enum machine_mode);
 154 static rtx arm_libcall_value (enum machine_mode, const_rtx);
 155 static bool arm_function_value_regno_p (const unsigned int);
 156 static void arm_internal_label (FILE *, const char *, unsigned long);
 157 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 158                                  tree);
 159 static bool arm_have_conditional_execution (void);
 160 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
 161 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
 162 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 163 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 164 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 165 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 166 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 167 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 168 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 169 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 170 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
 171 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
 172 static void arm_init_builtins (void);
 173 static void arm_init_iwmmxt_builtins (void);
 174 static rtx safe_vector_operand (rtx, enum machine_mode);
 175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
 176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
 177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
 178 static tree arm_builtin_decl (unsigned, bool);
 179 static void emit_constant_insn (rtx cond, rtx pattern);
 180 static rtx emit_set_insn (rtx, rtx);
 181 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 182 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
 183                                   tree, bool);
 184 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
 185                              const_tree, bool);
 186 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
 187                                       const_tree, bool);
 188 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
 189 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
 190                                       const_tree);
 191 static rtx aapcs_libcall_value (enum machine_mode);
 192 static int aapcs_select_return_coproc (const_tree, const_tree);
 193
 194 #ifdef OBJECT_FORMAT_ELF
 195 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 196 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 197 #endif
 198 #ifndef ARM_PE
 199 static void arm_encode_section_info (tree, rtx, int);
 200 #endif
 201
 202 static void arm_file_end (void);
 203 static void arm_file_start (void);
 204
 205 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
 206                                         tree, int *, int);
 207 static bool arm_pass_by_reference (cumulative_args_t,
 208                                    enum machine_mode, const_tree, bool);
 209 static bool arm_promote_prototypes (const_tree);
 210 static bool arm_default_short_enums (void);
 211 static bool arm_align_anon_bitfield (void);
 212 static bool arm_return_in_msb (const_tree);
 213 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
 214 static bool arm_return_in_memory (const_tree, const_tree);
 215 #if ARM_UNWIND_INFO
 216 static void arm_unwind_emit (FILE *, rtx);
 217 static bool arm_output_ttype (rtx);
 218 static void arm_asm_emit_except_personality (rtx);
 219 static void arm_asm_init_sections (void);
 220 #endif
 221 static rtx arm_dwarf_register_span (rtx);
 222
 223 static tree arm_cxx_guard_type (void);
 224 static bool arm_cxx_guard_mask_bit (void);
 225 static tree arm_get_cookie_size (tree);
 226 static bool arm_cookie_has_size (void);
 227 static bool arm_cxx_cdtor_returns_this (void);
 228 static bool arm_cxx_key_method_may_be_inline (void);
 229 static void arm_cxx_determine_class_data_visibility (tree);
 230 static bool arm_cxx_class_data_always_comdat (void);
 231 static bool arm_cxx_use_aeabi_atexit (void);
 232 static void arm_init_libfuncs (void);
 233 static tree arm_build_builtin_va_list (void);
 234 static void arm_expand_builtin_va_start (tree, rtx);
 235 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 236 static void arm_option_override (void);
 237 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
 238 static bool arm_cannot_copy_insn_p (rtx);
 239 static int arm_issue_rate (void);
 240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 241 static bool arm_output_addr_const_extra (FILE *, rtx);
 242 static bool arm_allocate_stack_slots_for_args (void);
 243 static bool arm_warn_func_return (tree);
 244 static const char *arm_invalid_parameter_type (const_tree t);
 245 static const char *arm_invalid_return_type (const_tree t);
 246 static tree arm_promoted_type (const_tree t);
 247 static tree arm_convert_to_type (tree type, tree expr);
 248 static bool arm_scalar_mode_supported_p (enum machine_mode);
 249 static bool arm_frame_pointer_required (void);
 250 static bool arm_can_eliminate (const int, const int);
 251 static void arm_asm_trampoline_template (FILE *);
 252 static void arm_trampoline_init (rtx, tree, rtx);
 253 static rtx arm_trampoline_adjust_address (rtx);
 254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
 256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
 257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
 258 static bool arm_array_mode_supported_p (enum machine_mode,
 259                                         unsigned HOST_WIDE_INT);
 260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
 261 static bool arm_class_likely_spilled_p (reg_class_t);
 262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
 265                                                      const_tree type,
 266                                                      int misalignment,
 267                                                      bool is_packed);
 268 static void arm_conditional_register_usage (void);
 269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 270 static unsigned int arm_autovectorize_vector_sizes (void);
 271 static int arm_default_branch_cost (bool, bool);
 272 static int arm_cortex_a5_branch_cost (bool, bool);
 273 static int arm_cortex_m_branch_cost (bool, bool);
 274
 275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 276                                              const unsigned char *sel);
 277
 278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 279                                            tree vectype,
 280                                            int misalign ATTRIBUTE_UNUSED);
 281 static unsigned arm_add_stmt_cost (void *data, int count,
 282                                    enum vect_cost_for_stmt kind,
 283                                    struct _stmt_vec_info *stmt_info,
 284                                    int misalign,
 285                                    enum vect_cost_model_location where);
 286
 287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 288                                          bool op0_preserve_value);
 289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 290 \f
 291 /* Table of machine attributes.  */
 292 static const struct attribute_spec arm_attribute_table[] =
 293 {
 294   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 295        affects_type_identity } */
 296   /* Function calls made to this symbol must be done indirectly, because
 297      it may lie outside of the 26 bit addressing range of a normal function
 298      call.  */
 299   { "long_call",    0, 0, false, true,  true,  NULL, false },
 300   /* Whereas these functions are always known to reside within the 26 bit
 301      addressing range.  */
 302   { "short_call",   0, 0, false, true,  true,  NULL, false },
 303   /* Specify the procedure call conventions for a function.  */
 304   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 305     false },
 306   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 307   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 308     false },
 309   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 310     false },
 311   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 312     false },
 313 #ifdef ARM_PE
 314   /* ARM/PE has three new attributes:
 315      interfacearm - ?
 316      dllexport - for exporting a function/variable that will live in a dll
 317      dllimport - for importing a function/variable from a dll
 318
 319      Microsoft allows multiple declspecs in one __declspec, separating
 320      them with spaces.  We do NOT support this.  Instead, use __declspec
 321      multiple times.
 322   */
 323   { "dllimport",    0, 0, true,  false, false, NULL, false },
 324   { "dllexport",    0, 0, true,  false, false, NULL, false },
 325   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 326     false },
 327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 328   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 329   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 330   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 331     false },
 332 #endif
 333   { NULL,           0, 0, false, false, false, NULL, false }
 334 };
 335 \f
 336 /* Initialize the GCC target structure.  */
 337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 338 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 340 #endif
 341
 342 #undef TARGET_LEGITIMIZE_ADDRESS
 343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 344
 345 #undef TARGET_LRA_P
 346 #define TARGET_LRA_P arm_lra_p
 347
 348 #undef  TARGET_ATTRIBUTE_TABLE
 349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 350
 351 #undef TARGET_ASM_FILE_START
 352 #define TARGET_ASM_FILE_START arm_file_start
 353 #undef TARGET_ASM_FILE_END
 354 #define TARGET_ASM_FILE_END arm_file_end
 355
 356 #undef  TARGET_ASM_ALIGNED_SI_OP
 357 #define TARGET_ASM_ALIGNED_SI_OP NULL
 358 #undef  TARGET_ASM_INTEGER
 359 #define TARGET_ASM_INTEGER arm_assemble_integer
 360
 361 #undef TARGET_PRINT_OPERAND
 362 #define TARGET_PRINT_OPERAND arm_print_operand
 363 #undef TARGET_PRINT_OPERAND_ADDRESS
 364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 367
 368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 370
 371 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 373
 374 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 376
 377 #undef  TARGET_OPTION_OVERRIDE
 378 #define TARGET_OPTION_OVERRIDE arm_option_override
 379
 380 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 382
 383 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 385
 386 #undef  TARGET_SCHED_ADJUST_COST
 387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 388
 389 #undef TARGET_SCHED_REORDER
 390 #define TARGET_SCHED_REORDER arm_sched_reorder
 391
 392 #undef TARGET_REGISTER_MOVE_COST
 393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 394
 395 #undef TARGET_MEMORY_MOVE_COST
 396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 397
 398 #undef TARGET_ENCODE_SECTION_INFO
 399 #ifdef ARM_PE
 400 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 401 #else
 402 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 403 #endif
 404
 405 #undef  TARGET_STRIP_NAME_ENCODING
 406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 407
 408 #undef  TARGET_ASM_INTERNAL_LABEL
 409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 410
 411 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 413
 414 #undef  TARGET_FUNCTION_VALUE
 415 #define TARGET_FUNCTION_VALUE arm_function_value
 416
 417 #undef  TARGET_LIBCALL_VALUE
 418 #define TARGET_LIBCALL_VALUE arm_libcall_value
 419
 420 #undef TARGET_FUNCTION_VALUE_REGNO_P
 421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 422
 423 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 425 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 427
 428 #undef  TARGET_RTX_COSTS
 429 #define TARGET_RTX_COSTS arm_rtx_costs
 430 #undef  TARGET_ADDRESS_COST
 431 #define TARGET_ADDRESS_COST arm_address_cost
 432
 433 #undef TARGET_SHIFT_TRUNCATION_MASK
 434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 443   arm_autovectorize_vector_sizes
 444
 445 #undef  TARGET_MACHINE_DEPENDENT_REORG
 446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 447
 448 #undef  TARGET_INIT_BUILTINS
 449 #define TARGET_INIT_BUILTINS  arm_init_builtins
 450 #undef  TARGET_EXPAND_BUILTIN
 451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 452 #undef  TARGET_BUILTIN_DECL
 453 #define TARGET_BUILTIN_DECL arm_builtin_decl
 454
 455 #undef TARGET_INIT_LIBFUNCS
 456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 457
 458 #undef TARGET_PROMOTE_FUNCTION_MODE
 459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 460 #undef TARGET_PROMOTE_PROTOTYPES
 461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 462 #undef TARGET_PASS_BY_REFERENCE
 463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 464 #undef TARGET_ARG_PARTIAL_BYTES
 465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 466 #undef TARGET_FUNCTION_ARG
 467 #define TARGET_FUNCTION_ARG arm_function_arg
 468 #undef TARGET_FUNCTION_ARG_ADVANCE
 469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 470 #undef TARGET_FUNCTION_ARG_BOUNDARY
 471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 472
 473 #undef  TARGET_SETUP_INCOMING_VARARGS
 474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 475
 476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 478
 479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 481 #undef TARGET_TRAMPOLINE_INIT
 482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 485
 486 #undef TARGET_WARN_FUNC_RETURN
 487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 488
 489 #undef TARGET_DEFAULT_SHORT_ENUMS
 490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 491
 492 #undef TARGET_ALIGN_ANON_BITFIELD
 493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 494
 495 #undef TARGET_NARROW_VOLATILE_BITFIELD
 496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 497
 498 #undef TARGET_CXX_GUARD_TYPE
 499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 500
 501 #undef TARGET_CXX_GUARD_MASK_BIT
 502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 503
 504 #undef TARGET_CXX_GET_COOKIE_SIZE
 505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 506
 507 #undef TARGET_CXX_COOKIE_HAS_SIZE
 508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 509
 510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 512
 513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 515
 516 #undef TARGET_CXX_USE_AEABI_ATEXIT
 517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 518
 519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 521   arm_cxx_determine_class_data_visibility
 522
 523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 525
 526 #undef TARGET_RETURN_IN_MSB
 527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 528
 529 #undef TARGET_RETURN_IN_MEMORY
 530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 531
 532 #undef TARGET_MUST_PASS_IN_STACK
 533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 534
 535 #if ARM_UNWIND_INFO
 536 #undef TARGET_ASM_UNWIND_EMIT
 537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 538
 539 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 540 #undef TARGET_ASM_TTYPE
 541 #define TARGET_ASM_TTYPE arm_output_ttype
 542
 543 #undef TARGET_ARM_EABI_UNWINDER
 544 #define TARGET_ARM_EABI_UNWINDER true
 545
 546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 548
 549 #undef TARGET_ASM_INIT_SECTIONS
 550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 551 #endif /* ARM_UNWIND_INFO */
 552
 553 #undef TARGET_DWARF_REGISTER_SPAN
 554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 555
 556 #undef  TARGET_CANNOT_COPY_INSN_P
 557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 558
 559 #ifdef HAVE_AS_TLS
 560 #undef TARGET_HAVE_TLS
 561 #define TARGET_HAVE_TLS true
 562 #endif
 563
 564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 566
 567 #undef TARGET_LEGITIMATE_CONSTANT_P
 568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 569
 570 #undef TARGET_CANNOT_FORCE_CONST_MEM
 571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 572
 573 #undef TARGET_MAX_ANCHOR_OFFSET
 574 #define TARGET_MAX_ANCHOR_OFFSET 4095
 575
 576 /* The minimum is set such that the total size of the block
 577    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 578    divisible by eight, ensuring natural spacing of anchors.  */
 579 #undef TARGET_MIN_ANCHOR_OFFSET
 580 #define TARGET_MIN_ANCHOR_OFFSET -4088
 581
 582 #undef TARGET_SCHED_ISSUE_RATE
 583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 584
 585 #undef TARGET_MANGLE_TYPE
 586 #define TARGET_MANGLE_TYPE arm_mangle_type
 587
 588 #undef TARGET_BUILD_BUILTIN_VA_LIST
 589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 590 #undef TARGET_EXPAND_BUILTIN_VA_START
 591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 594
 595 #ifdef HAVE_AS_TLS
 596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 598 #endif
 599
 600 #undef TARGET_LEGITIMATE_ADDRESS_P
 601 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 602
 603 #undef TARGET_PREFERRED_RELOAD_CLASS
 604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 605
 606 #undef TARGET_INVALID_PARAMETER_TYPE
 607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 608
 609 #undef TARGET_INVALID_RETURN_TYPE
 610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 611
 612 #undef TARGET_PROMOTED_TYPE
 613 #define TARGET_PROMOTED_TYPE arm_promoted_type
 614
 615 #undef TARGET_CONVERT_TO_TYPE
 616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 617
 618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 620
 621 #undef TARGET_FRAME_POINTER_REQUIRED
 622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 623
 624 #undef TARGET_CAN_ELIMINATE
 625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 626
 627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 629
 630 #undef TARGET_CLASS_LIKELY_SPILLED_P
 631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 632
 633 #undef TARGET_VECTORIZE_BUILTINS
 634 #define TARGET_VECTORIZE_BUILTINS
 635
 636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 638   arm_builtin_vectorized_function
 639
 640 #undef TARGET_VECTOR_ALIGNMENT
 641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 642
 643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 645   arm_vector_alignment_reachable
 646
 647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 649   arm_builtin_support_vector_misalignment
 650
 651 #undef TARGET_PREFERRED_RENAME_CLASS
 652 #define TARGET_PREFERRED_RENAME_CLASS \
 653   arm_preferred_rename_class
 654
 655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 657   arm_vectorize_vec_perm_const_ok
 658
 659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 661   arm_builtin_vectorization_cost
 662 #undef TARGET_VECTORIZE_ADD_STMT_COST
 663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 664
 665 #undef TARGET_CANONICALIZE_COMPARISON
 666 #define TARGET_CANONICALIZE_COMPARISON \
 667   arm_canonicalize_comparison
 668
 669 #undef TARGET_ASAN_SHADOW_OFFSET
 670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 671
 672 #undef MAX_INSN_PER_IT_BLOCK
 673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 674
 675 #undef TARGET_CAN_USE_DOLOOP_P
 676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 677
 678 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 679 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 680
 681 struct gcc_target targetm = TARGET_INITIALIZER;
 682 \f
 683 /* Obstack for minipool constant handling.  */
 684 static struct obstack minipool_obstack;
 685 static char *         minipool_startobj;
 686
 687 /* The maximum number of insns skipped which
 688    will be conditionalised if possible.  */
 689 static int max_insns_skipped = 5;
 690
 691 extern FILE * asm_out_file;
 692
 693 /* True if we are currently building a constant table.  */
 694 int making_const_table;
 695
 696 /* The processor for which instructions should be scheduled.  */
 697 enum processor_type arm_tune = arm_none;
 698
 699 /* The current tuning set.  */
 700 const struct tune_params *current_tune;
 701
 702 /* Which floating point hardware to schedule for.  */
 703 int arm_fpu_attr;
 704
 705 /* Which floating popint hardware to use.  */
 706 const struct arm_fpu_desc *arm_fpu_desc;
 707
 708 /* Used for Thumb call_via trampolines.  */
 709 rtx thumb_call_via_label[14];
 710 static int thumb_call_reg_needed;
 711
 712 /* Bit values used to identify processor capabilities.  */
 713 #define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
 714 #define FL_ARCH3M     (1 << 1)        /* Extended multiply */
 715 #define FL_MODE26     (1 << 2)        /* 26-bit mode support */
 716 #define FL_MODE32     (1 << 3)        /* 32-bit mode support */
 717 #define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
 718 #define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
 719 #define FL_THUMB      (1 << 6)        /* Thumb aware */
 720 #define FL_LDSCHED    (1 << 7)        /* Load scheduling necessary */
 721 #define FL_STRONG     (1 << 8)        /* StrongARM */
 722 #define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
 723 #define FL_XSCALE     (1 << 10)       /* XScale */
 724 /* spare              (1 << 11) */
 725 #define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
 726                                          media instructions.  */
 727 #define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
 728 #define FL_WBUF       (1 << 14)       /* Schedule for write buffer ops.
 729                                          Note: ARM6 & 7 derivatives only.  */
 730 #define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
 731 #define FL_THUMB2     (1 << 16)       /* Thumb-2.  */
 732 #define FL_NOTM       (1 << 17)       /* Instructions not present in the 'M'
 733                                          profile.  */
 734 #define FL_THUMB_DIV  (1 << 18)       /* Hardware divide (Thumb mode).  */
 735 #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
 736 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
 737 #define FL_ARCH7EM    (1 << 21)       /* Instructions present in the ARMv7E-M
 738                                          architecture.  */
 739 #define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
 740 #define FL_ARM_DIV    (1 << 23)       /* Hardware divide (ARM mode).  */
 741 #define FL_ARCH8      (1 << 24)       /* Architecture 8.  */
 742 #define FL_CRC32      (1 << 25)       /* ARMv8 CRC32 instructions.  */
 743
 744 #define FL_IWMMXT     (1 << 29)       /* XScale v2 or "Intel Wireless MMX technology".  */
 745 #define FL_IWMMXT2    (1 << 30)       /* "Intel Wireless MMX2 technology".  */
 746
 747 /* Flags that only effect tuning, not available instructions.  */
 748 #define FL_TUNE         (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
 749                          | FL_CO_PROC)
 750
 751 #define FL_FOR_ARCH2    FL_NOTM
 752 #define FL_FOR_ARCH3    (FL_FOR_ARCH2 | FL_MODE32)
 753 #define FL_FOR_ARCH3M   (FL_FOR_ARCH3 | FL_ARCH3M)
 754 #define FL_FOR_ARCH4    (FL_FOR_ARCH3M | FL_ARCH4)
 755 #define FL_FOR_ARCH4T   (FL_FOR_ARCH4 | FL_THUMB)
 756 #define FL_FOR_ARCH5    (FL_FOR_ARCH4 | FL_ARCH5)
 757 #define FL_FOR_ARCH5T   (FL_FOR_ARCH5 | FL_THUMB)
 758 #define FL_FOR_ARCH5E   (FL_FOR_ARCH5 | FL_ARCH5E)
 759 #define FL_FOR_ARCH5TE  (FL_FOR_ARCH5E | FL_THUMB)
 760 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
 761 #define FL_FOR_ARCH6    (FL_FOR_ARCH5TE | FL_ARCH6)
 762 #define FL_FOR_ARCH6J   FL_FOR_ARCH6
 763 #define FL_FOR_ARCH6K   (FL_FOR_ARCH6 | FL_ARCH6K)
 764 #define FL_FOR_ARCH6Z   FL_FOR_ARCH6
 765 #define FL_FOR_ARCH6ZK  FL_FOR_ARCH6K
 766 #define FL_FOR_ARCH6T2  (FL_FOR_ARCH6 | FL_THUMB2)
 767 #define FL_FOR_ARCH6M   (FL_FOR_ARCH6 & ~FL_NOTM)
 768 #define FL_FOR_ARCH7    ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
 769 #define FL_FOR_ARCH7A   (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
 770 #define FL_FOR_ARCH7VE  (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
 771 #define FL_FOR_ARCH7R   (FL_FOR_ARCH7A | FL_THUMB_DIV)
 772 #define FL_FOR_ARCH7M   (FL_FOR_ARCH7 | FL_THUMB_DIV)
 773 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
 774 #define FL_FOR_ARCH8A   (FL_FOR_ARCH7VE | FL_ARCH8)
 775
 776 /* The bits in this mask specify which
 777    instructions we are allowed to generate.  */
 778 static unsigned long insn_flags = 0;
 779
 780 /* The bits in this mask specify which instruction scheduling options should
 781    be used.  */
 782 static unsigned long tune_flags = 0;
 783
 784 /* The highest ARM architecture version supported by the
 785    target.  */
 786 enum base_architecture arm_base_arch = BASE_ARCH_0;
 787
 788 /* The following are used in the arm.md file as equivalents to bits
 789    in the above two flag variables.  */
 790
 791 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 792 int arm_arch3m = 0;
 793
 794 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 795 int arm_arch4 = 0;
 796
 797 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 798 int arm_arch4t = 0;
 799
 800 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 801 int arm_arch5 = 0;
 802
 803 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 804 int arm_arch5e = 0;
 805
 806 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 807 int arm_arch6 = 0;
 808
 809 /* Nonzero if this chip supports the ARM 6K extensions.  */
 810 int arm_arch6k = 0;
 811
 812 /* Nonzero if instructions present in ARMv6-M can be used.  */
 813 int arm_arch6m = 0;
 814
 815 /* Nonzero if this chip supports the ARM 7 extensions.  */
 816 int arm_arch7 = 0;
 817
 818 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 819 int arm_arch_notm = 0;
 820
 821 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 822 int arm_arch7em = 0;
 823
 824 /* Nonzero if instructions present in ARMv8 can be used.  */
 825 int arm_arch8 = 0;
 826
 827 /* Nonzero if this chip can benefit from load scheduling.  */
 828 int arm_ld_sched = 0;
 829
 830 /* Nonzero if this chip is a StrongARM.  */
 831 int arm_tune_strongarm = 0;
 832
 833 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 834 int arm_arch_iwmmxt = 0;
 835
 836 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 837 int arm_arch_iwmmxt2 = 0;
 838
 839 /* Nonzero if this chip is an XScale.  */
 840 int arm_arch_xscale = 0;
 841
 842 /* Nonzero if tuning for XScale  */
 843 int arm_tune_xscale = 0;
 844
 845 /* Nonzero if we want to tune for stores that access the write-buffer.
 846    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 847 int arm_tune_wbuf = 0;
 848
 849 /* Nonzero if tuning for Cortex-A9.  */
 850 int arm_tune_cortex_a9 = 0;
 851
 852 /* Nonzero if generating Thumb instructions.  */
 853 int thumb_code = 0;
 854
 855 /* Nonzero if generating Thumb-1 instructions.  */
 856 int thumb1_code = 0;
 857
 858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 859    preprocessor.
 860    XXX This is a bit of a hack, it's intended to help work around
 861    problems in GLD which doesn't understand that armv5t code is
 862    interworking clean.  */
 863 int arm_cpp_interwork = 0;
 864
 865 /* Nonzero if chip supports Thumb 2.  */
 866 int arm_arch_thumb2;
 867
 868 /* Nonzero if chip supports integer division instruction.  */
 869 int arm_arch_arm_hwdiv;
 870 int arm_arch_thumb_hwdiv;
 871
 872 /* Nonzero if we should use Neon to handle 64-bits operations rather
 873    than core registers.  */
 874 int prefer_neon_for_64bits = 0;
 875
 876 /* Nonzero if we shouldn't use literal pools.  */
 877 bool arm_disable_literal_pool = false;
 878
 879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 880    we must report the mode of the memory reference from
 881    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 882 enum machine_mode output_memory_reference_mode;
 883
 884 /* The register number to be used for the PIC offset register.  */
 885 unsigned arm_pic_register = INVALID_REGNUM;
 886
 887 enum arm_pcs arm_pcs_default;
 888
 889 /* For an explanation of these variables, see final_prescan_insn below.  */
 890 int arm_ccfsm_state;
 891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 892 enum arm_cond_code arm_current_cc;
 893
 894 rtx arm_target_insn;
 895 int arm_target_label;
 896 /* The number of conditionally executed insns, including the current insn.  */
 897 int arm_condexec_count = 0;
 898 /* A bitmask specifying the patterns for the IT block.
 899    Zero means do not output an IT block before this insn. */
 900 int arm_condexec_mask = 0;
 901 /* The number of bits used in arm_condexec_mask.  */
 902 int arm_condexec_masklen = 0;
 903
 904 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 905 int arm_arch_crc = 0;
 906
 907 /* The condition codes of the ARM, and the inverse function.  */
 908 static const char * const arm_condition_codes[] =
 909 {
 910   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 911   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 912 };
 913
 914 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 915 int arm_regs_in_sequence[] =
 916 {
 917   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 918 };
 919
 920 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 921 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 922
 923 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 924                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 925                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 926 \f
 927 /* Initialization code.  */
 928
 929 struct processors
 930 {
 931   const char *const name;
 932   enum processor_type core;
 933   const char *arch;
 934   enum base_architecture base_arch;
 935   const unsigned long flags;
 936   const struct tune_params *const tune;
 937 };
 938
 939
 940 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 941 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 942   prefetch_slots, \
 943   l1_size, \
 944   l1_line_size
 945
 946 /* arm generic vectorizer costs.  */
 947 static const
 948 struct cpu_vec_costs arm_default_vec_cost = {
 949   1,                                    /* scalar_stmt_cost.  */
 950   1,                                    /* scalar load_cost.  */
 951   1,                                    /* scalar_store_cost.  */
 952   1,                                    /* vec_stmt_cost.  */
 953   1,                                    /* vec_to_scalar_cost.  */
 954   1,                                    /* scalar_to_vec_cost.  */
 955   1,                                    /* vec_align_load_cost.  */
 956   1,                                    /* vec_unalign_load_cost.  */
 957   1,                                    /* vec_unalign_store_cost.  */
 958   1,                                    /* vec_store_cost.  */
 959   3,                                    /* cond_taken_branch_cost.  */
 960   1,                                    /* cond_not_taken_branch_cost.  */
 961 };
 962
 963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 964 #include "aarch-cost-tables.h"
 965
 966
 967
 968 const struct cpu_cost_table cortexa9_extra_costs =
 969 {
 970   /* ALU */
 971   {
 972     0,                  /* arith.  */
 973     0,                  /* logical.  */
 974     0,                  /* shift.  */
 975     COSTS_N_INSNS (1),  /* shift_reg.  */
 976     COSTS_N_INSNS (1),  /* arith_shift.  */
 977     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
 978     0,                  /* log_shift.  */
 979     COSTS_N_INSNS (1),  /* log_shift_reg.  */
 980     COSTS_N_INSNS (1),  /* extend.  */
 981     COSTS_N_INSNS (2),  /* extend_arith.  */
 982     COSTS_N_INSNS (1),  /* bfi.  */
 983     COSTS_N_INSNS (1),  /* bfx.  */
 984     0,                  /* clz.  */
 985     0,                  /* rev.  */
 986     0,                  /* non_exec.  */
 987     true                /* non_exec_costs_exec.  */
 988   },
 989   {
 990     /* MULT SImode */
 991     {
 992       COSTS_N_INSNS (3),        /* simple.  */
 993       COSTS_N_INSNS (3),        /* flag_setting.  */
 994       COSTS_N_INSNS (2),        /* extend.  */
 995       COSTS_N_INSNS (3),        /* add.  */
 996       COSTS_N_INSNS (2),        /* extend_add.  */
 997       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
 998     },
 999     /* MULT DImode */
1000     {
1001       0,                        /* simple (N/A).  */
1002       0,                        /* flag_setting (N/A).  */
1003       COSTS_N_INSNS (4),        /* extend.  */
1004       0,                        /* add (N/A).  */
1005       COSTS_N_INSNS (4),        /* extend_add.  */
1006       0                         /* idiv (N/A).  */
1007     }
1008   },
1009   /* LD/ST */
1010   {
1011     COSTS_N_INSNS (2),  /* load.  */
1012     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1013     COSTS_N_INSNS (2),  /* ldrd.  */
1014     COSTS_N_INSNS (2),  /* ldm_1st.  */
1015     1,                  /* ldm_regs_per_insn_1st.  */
1016     2,                  /* ldm_regs_per_insn_subsequent.  */
1017     COSTS_N_INSNS (5),  /* loadf.  */
1018     COSTS_N_INSNS (5),  /* loadd.  */
1019     COSTS_N_INSNS (1),  /* load_unaligned.  */
1020     COSTS_N_INSNS (2),  /* store.  */
1021     COSTS_N_INSNS (2),  /* strd.  */
1022     COSTS_N_INSNS (2),  /* stm_1st.  */
1023     1,                  /* stm_regs_per_insn_1st.  */
1024     2,                  /* stm_regs_per_insn_subsequent.  */
1025     COSTS_N_INSNS (1),  /* storef.  */
1026     COSTS_N_INSNS (1),  /* stored.  */
1027     COSTS_N_INSNS (1)   /* store_unaligned.  */
1028   },
1029   {
1030     /* FP SFmode */
1031     {
1032       COSTS_N_INSNS (14),       /* div.  */
1033       COSTS_N_INSNS (4),        /* mult.  */
1034       COSTS_N_INSNS (7),        /* mult_addsub. */
1035       COSTS_N_INSNS (30),       /* fma.  */
1036       COSTS_N_INSNS (3),        /* addsub.  */
1037       COSTS_N_INSNS (1),        /* fpconst.  */
1038       COSTS_N_INSNS (1),        /* neg.  */
1039       COSTS_N_INSNS (3),        /* compare.  */
1040       COSTS_N_INSNS (3),        /* widen.  */
1041       COSTS_N_INSNS (3),        /* narrow.  */
1042       COSTS_N_INSNS (3),        /* toint.  */
1043       COSTS_N_INSNS (3),        /* fromint.  */
1044       COSTS_N_INSNS (3)         /* roundint.  */
1045     },
1046     /* FP DFmode */
1047     {
1048       COSTS_N_INSNS (24),       /* div.  */
1049       COSTS_N_INSNS (5),        /* mult.  */
1050       COSTS_N_INSNS (8),        /* mult_addsub.  */
1051       COSTS_N_INSNS (30),       /* fma.  */
1052       COSTS_N_INSNS (3),        /* addsub.  */
1053       COSTS_N_INSNS (1),        /* fpconst.  */
1054       COSTS_N_INSNS (1),        /* neg.  */
1055       COSTS_N_INSNS (3),        /* compare.  */
1056       COSTS_N_INSNS (3),        /* widen.  */
1057       COSTS_N_INSNS (3),        /* narrow.  */
1058       COSTS_N_INSNS (3),        /* toint.  */
1059       COSTS_N_INSNS (3),        /* fromint.  */
1060       COSTS_N_INSNS (3)         /* roundint.  */
1061     }
1062   },
1063   /* Vector */
1064   {
1065     COSTS_N_INSNS (1)   /* alu.  */
1066   }
1067 };
1068
1069 const struct cpu_cost_table cortexa8_extra_costs =
1070 {
1071   /* ALU */
1072   {
1073     0,                  /* arith.  */
1074     0,                  /* logical.  */
1075     COSTS_N_INSNS (1),  /* shift.  */
1076     0,                  /* shift_reg.  */
1077     COSTS_N_INSNS (1),  /* arith_shift.  */
1078     0,                  /* arith_shift_reg.  */
1079     COSTS_N_INSNS (1),  /* log_shift.  */
1080     0,                  /* log_shift_reg.  */
1081     0,                  /* extend.  */
1082     0,                  /* extend_arith.  */
1083     0,                  /* bfi.  */
1084     0,                  /* bfx.  */
1085     0,                  /* clz.  */
1086     0,                  /* rev.  */
1087     0,                  /* non_exec.  */
1088     true                /* non_exec_costs_exec.  */
1089   },
1090   {
1091     /* MULT SImode */
1092     {
1093       COSTS_N_INSNS (1),        /* simple.  */
1094       COSTS_N_INSNS (1),        /* flag_setting.  */
1095       COSTS_N_INSNS (1),        /* extend.  */
1096       COSTS_N_INSNS (1),        /* add.  */
1097       COSTS_N_INSNS (1),        /* extend_add.  */
1098       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1099     },
1100     /* MULT DImode */
1101     {
1102       0,                        /* simple (N/A).  */
1103       0,                        /* flag_setting (N/A).  */
1104       COSTS_N_INSNS (2),        /* extend.  */
1105       0,                        /* add (N/A).  */
1106       COSTS_N_INSNS (2),        /* extend_add.  */
1107       0                         /* idiv (N/A).  */
1108     }
1109   },
1110   /* LD/ST */
1111   {
1112     COSTS_N_INSNS (1),  /* load.  */
1113     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1114     COSTS_N_INSNS (1),  /* ldrd.  */
1115     COSTS_N_INSNS (1),  /* ldm_1st.  */
1116     1,                  /* ldm_regs_per_insn_1st.  */
1117     2,                  /* ldm_regs_per_insn_subsequent.  */
1118     COSTS_N_INSNS (1),  /* loadf.  */
1119     COSTS_N_INSNS (1),  /* loadd.  */
1120     COSTS_N_INSNS (1),  /* load_unaligned.  */
1121     COSTS_N_INSNS (1),  /* store.  */
1122     COSTS_N_INSNS (1),  /* strd.  */
1123     COSTS_N_INSNS (1),  /* stm_1st.  */
1124     1,                  /* stm_regs_per_insn_1st.  */
1125     2,                  /* stm_regs_per_insn_subsequent.  */
1126     COSTS_N_INSNS (1),  /* storef.  */
1127     COSTS_N_INSNS (1),  /* stored.  */
1128     COSTS_N_INSNS (1)   /* store_unaligned.  */
1129   },
1130   {
1131     /* FP SFmode */
1132     {
1133       COSTS_N_INSNS (36),       /* div.  */
1134       COSTS_N_INSNS (11),       /* mult.  */
1135       COSTS_N_INSNS (20),       /* mult_addsub. */
1136       COSTS_N_INSNS (30),       /* fma.  */
1137       COSTS_N_INSNS (9),        /* addsub.  */
1138       COSTS_N_INSNS (3),        /* fpconst.  */
1139       COSTS_N_INSNS (3),        /* neg.  */
1140       COSTS_N_INSNS (6),        /* compare.  */
1141       COSTS_N_INSNS (4),        /* widen.  */
1142       COSTS_N_INSNS (4),        /* narrow.  */
1143       COSTS_N_INSNS (8),        /* toint.  */
1144       COSTS_N_INSNS (8),        /* fromint.  */
1145       COSTS_N_INSNS (8)         /* roundint.  */
1146     },
1147     /* FP DFmode */
1148     {
1149       COSTS_N_INSNS (64),       /* div.  */
1150       COSTS_N_INSNS (16),       /* mult.  */
1151       COSTS_N_INSNS (25),       /* mult_addsub.  */
1152       COSTS_N_INSNS (30),       /* fma.  */
1153       COSTS_N_INSNS (9),        /* addsub.  */
1154       COSTS_N_INSNS (3),        /* fpconst.  */
1155       COSTS_N_INSNS (3),        /* neg.  */
1156       COSTS_N_INSNS (6),        /* compare.  */
1157       COSTS_N_INSNS (6),        /* widen.  */
1158       COSTS_N_INSNS (6),        /* narrow.  */
1159       COSTS_N_INSNS (8),        /* toint.  */
1160       COSTS_N_INSNS (8),        /* fromint.  */
1161       COSTS_N_INSNS (8)         /* roundint.  */
1162     }
1163   },
1164   /* Vector */
1165   {
1166     COSTS_N_INSNS (1)   /* alu.  */
1167   }
1168 };
1169
1170
1171
1172 const struct cpu_cost_table cortexa7_extra_costs =
1173 {
1174   /* ALU */
1175   {
1176     0,                  /* arith.  */
1177     0,                  /* logical.  */
1178     COSTS_N_INSNS (1),  /* shift.  */
1179     COSTS_N_INSNS (1),  /* shift_reg.  */
1180     COSTS_N_INSNS (1),  /* arith_shift.  */
1181     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1182     COSTS_N_INSNS (1),  /* log_shift.  */
1183     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1184     COSTS_N_INSNS (1),  /* extend.  */
1185     COSTS_N_INSNS (1),  /* extend_arith.  */
1186     COSTS_N_INSNS (1),  /* bfi.  */
1187     COSTS_N_INSNS (1),  /* bfx.  */
1188     COSTS_N_INSNS (1),  /* clz.  */
1189     COSTS_N_INSNS (1),  /* rev.  */
1190     0,                  /* non_exec.  */
1191     true                /* non_exec_costs_exec.  */
1192   },
1193
1194   {
1195     /* MULT SImode */
1196     {
1197       0,                        /* simple.  */
1198       COSTS_N_INSNS (1),        /* flag_setting.  */
1199       COSTS_N_INSNS (1),        /* extend.  */
1200       COSTS_N_INSNS (1),        /* add.  */
1201       COSTS_N_INSNS (1),        /* extend_add.  */
1202       COSTS_N_INSNS (7)         /* idiv.  */
1203     },
1204     /* MULT DImode */
1205     {
1206       0,                        /* simple (N/A).  */
1207       0,                        /* flag_setting (N/A).  */
1208       COSTS_N_INSNS (1),        /* extend.  */
1209       0,                        /* add.  */
1210       COSTS_N_INSNS (2),        /* extend_add.  */
1211       0                         /* idiv (N/A).  */
1212     }
1213   },
1214   /* LD/ST */
1215   {
1216     COSTS_N_INSNS (1),  /* load.  */
1217     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1218     COSTS_N_INSNS (3),  /* ldrd.  */
1219     COSTS_N_INSNS (1),  /* ldm_1st.  */
1220     1,                  /* ldm_regs_per_insn_1st.  */
1221     2,                  /* ldm_regs_per_insn_subsequent.  */
1222     COSTS_N_INSNS (2),  /* loadf.  */
1223     COSTS_N_INSNS (2),  /* loadd.  */
1224     COSTS_N_INSNS (1),  /* load_unaligned.  */
1225     COSTS_N_INSNS (1),  /* store.  */
1226     COSTS_N_INSNS (3),  /* strd.  */
1227     COSTS_N_INSNS (1),  /* stm_1st.  */
1228     1,                  /* stm_regs_per_insn_1st.  */
1229     2,                  /* stm_regs_per_insn_subsequent.  */
1230     COSTS_N_INSNS (2),  /* storef.  */
1231     COSTS_N_INSNS (2),  /* stored.  */
1232     COSTS_N_INSNS (1)   /* store_unaligned.  */
1233   },
1234   {
1235     /* FP SFmode */
1236     {
1237       COSTS_N_INSNS (15),       /* div.  */
1238       COSTS_N_INSNS (3),        /* mult.  */
1239       COSTS_N_INSNS (7),        /* mult_addsub. */
1240       COSTS_N_INSNS (7),        /* fma.  */
1241       COSTS_N_INSNS (3),        /* addsub.  */
1242       COSTS_N_INSNS (3),        /* fpconst.  */
1243       COSTS_N_INSNS (3),        /* neg.  */
1244       COSTS_N_INSNS (3),        /* compare.  */
1245       COSTS_N_INSNS (3),        /* widen.  */
1246       COSTS_N_INSNS (3),        /* narrow.  */
1247       COSTS_N_INSNS (3),        /* toint.  */
1248       COSTS_N_INSNS (3),        /* fromint.  */
1249       COSTS_N_INSNS (3)         /* roundint.  */
1250     },
1251     /* FP DFmode */
1252     {
1253       COSTS_N_INSNS (30),       /* div.  */
1254       COSTS_N_INSNS (6),        /* mult.  */
1255       COSTS_N_INSNS (10),       /* mult_addsub.  */
1256       COSTS_N_INSNS (7),        /* fma.  */
1257       COSTS_N_INSNS (3),        /* addsub.  */
1258       COSTS_N_INSNS (3),        /* fpconst.  */
1259       COSTS_N_INSNS (3),        /* neg.  */
1260       COSTS_N_INSNS (3),        /* compare.  */
1261       COSTS_N_INSNS (3),        /* widen.  */
1262       COSTS_N_INSNS (3),        /* narrow.  */
1263       COSTS_N_INSNS (3),        /* toint.  */
1264       COSTS_N_INSNS (3),        /* fromint.  */
1265       COSTS_N_INSNS (3)         /* roundint.  */
1266     }
1267   },
1268   /* Vector */
1269   {
1270     COSTS_N_INSNS (1)   /* alu.  */
1271   }
1272 };
1273
1274 const struct cpu_cost_table cortexa12_extra_costs =
1275 {
1276   /* ALU */
1277   {
1278     0,                  /* arith.  */
1279     0,                  /* logical.  */
1280     0,                  /* shift.  */
1281     COSTS_N_INSNS (1),  /* shift_reg.  */
1282     COSTS_N_INSNS (1),  /* arith_shift.  */
1283     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1284     COSTS_N_INSNS (1),  /* log_shift.  */
1285     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1286     0,                  /* extend.  */
1287     COSTS_N_INSNS (1),  /* extend_arith.  */
1288     0,                  /* bfi.  */
1289     COSTS_N_INSNS (1),  /* bfx.  */
1290     COSTS_N_INSNS (1),  /* clz.  */
1291     COSTS_N_INSNS (1),  /* rev.  */
1292     0,                  /* non_exec.  */
1293     true                /* non_exec_costs_exec.  */
1294   },
1295   /* MULT SImode */
1296   {
1297     {
1298       COSTS_N_INSNS (2),        /* simple.  */
1299       COSTS_N_INSNS (3),        /* flag_setting.  */
1300       COSTS_N_INSNS (2),        /* extend.  */
1301       COSTS_N_INSNS (3),        /* add.  */
1302       COSTS_N_INSNS (2),        /* extend_add.  */
1303       COSTS_N_INSNS (18)        /* idiv.  */
1304     },
1305     /* MULT DImode */
1306     {
1307       0,                        /* simple (N/A).  */
1308       0,                        /* flag_setting (N/A).  */
1309       COSTS_N_INSNS (3),        /* extend.  */
1310       0,                        /* add (N/A).  */
1311       COSTS_N_INSNS (3),        /* extend_add.  */
1312       0                         /* idiv (N/A).  */
1313     }
1314   },
1315   /* LD/ST */
1316   {
1317     COSTS_N_INSNS (3),  /* load.  */
1318     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1319     COSTS_N_INSNS (3),  /* ldrd.  */
1320     COSTS_N_INSNS (3),  /* ldm_1st.  */
1321     1,                  /* ldm_regs_per_insn_1st.  */
1322     2,                  /* ldm_regs_per_insn_subsequent.  */
1323     COSTS_N_INSNS (3),  /* loadf.  */
1324     COSTS_N_INSNS (3),  /* loadd.  */
1325     0,                  /* load_unaligned.  */
1326     0,                  /* store.  */
1327     0,                  /* strd.  */
1328     0,                  /* stm_1st.  */
1329     1,                  /* stm_regs_per_insn_1st.  */
1330     2,                  /* stm_regs_per_insn_subsequent.  */
1331     COSTS_N_INSNS (2),  /* storef.  */
1332     COSTS_N_INSNS (2),  /* stored.  */
1333     0                   /* store_unaligned.  */
1334   },
1335   {
1336     /* FP SFmode */
1337     {
1338       COSTS_N_INSNS (17),       /* div.  */
1339       COSTS_N_INSNS (4),        /* mult.  */
1340       COSTS_N_INSNS (8),        /* mult_addsub. */
1341       COSTS_N_INSNS (8),        /* fma.  */
1342       COSTS_N_INSNS (4),        /* addsub.  */
1343       COSTS_N_INSNS (2),        /* fpconst. */
1344       COSTS_N_INSNS (2),        /* neg.  */
1345       COSTS_N_INSNS (2),        /* compare.  */
1346       COSTS_N_INSNS (4),        /* widen.  */
1347       COSTS_N_INSNS (4),        /* narrow.  */
1348       COSTS_N_INSNS (4),        /* toint.  */
1349       COSTS_N_INSNS (4),        /* fromint.  */
1350       COSTS_N_INSNS (4)         /* roundint.  */
1351     },
1352     /* FP DFmode */
1353     {
1354       COSTS_N_INSNS (31),       /* div.  */
1355       COSTS_N_INSNS (4),        /* mult.  */
1356       COSTS_N_INSNS (8),        /* mult_addsub.  */
1357       COSTS_N_INSNS (8),        /* fma.  */
1358       COSTS_N_INSNS (4),        /* addsub.  */
1359       COSTS_N_INSNS (2),        /* fpconst.  */
1360       COSTS_N_INSNS (2),        /* neg.  */
1361       COSTS_N_INSNS (2),        /* compare.  */
1362       COSTS_N_INSNS (4),        /* widen.  */
1363       COSTS_N_INSNS (4),        /* narrow.  */
1364       COSTS_N_INSNS (4),        /* toint.  */
1365       COSTS_N_INSNS (4),        /* fromint.  */
1366       COSTS_N_INSNS (4)         /* roundint.  */
1367     }
1368   },
1369   /* Vector */
1370   {
1371     COSTS_N_INSNS (1)   /* alu.  */
1372   }
1373 };
1374
1375 const struct cpu_cost_table cortexa15_extra_costs =
1376 {
1377   /* ALU */
1378   {
1379     0,                  /* arith.  */
1380     0,                  /* logical.  */
1381     0,                  /* shift.  */
1382     0,                  /* shift_reg.  */
1383     COSTS_N_INSNS (1),  /* arith_shift.  */
1384     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1385     COSTS_N_INSNS (1),  /* log_shift.  */
1386     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1387     0,                  /* extend.  */
1388     COSTS_N_INSNS (1),  /* extend_arith.  */
1389     COSTS_N_INSNS (1),  /* bfi.  */
1390     0,                  /* bfx.  */
1391     0,                  /* clz.  */
1392     0,                  /* rev.  */
1393     0,                  /* non_exec.  */
1394     true                /* non_exec_costs_exec.  */
1395   },
1396   /* MULT SImode */
1397   {
1398     {
1399       COSTS_N_INSNS (2),        /* simple.  */
1400       COSTS_N_INSNS (3),        /* flag_setting.  */
1401       COSTS_N_INSNS (2),        /* extend.  */
1402       COSTS_N_INSNS (2),        /* add.  */
1403       COSTS_N_INSNS (2),        /* extend_add.  */
1404       COSTS_N_INSNS (18)        /* idiv.  */
1405     },
1406     /* MULT DImode */
1407     {
1408       0,                        /* simple (N/A).  */
1409       0,                        /* flag_setting (N/A).  */
1410       COSTS_N_INSNS (3),        /* extend.  */
1411       0,                        /* add (N/A).  */
1412       COSTS_N_INSNS (3),        /* extend_add.  */
1413       0                         /* idiv (N/A).  */
1414     }
1415   },
1416   /* LD/ST */
1417   {
1418     COSTS_N_INSNS (3),  /* load.  */
1419     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1420     COSTS_N_INSNS (3),  /* ldrd.  */
1421     COSTS_N_INSNS (4),  /* ldm_1st.  */
1422     1,                  /* ldm_regs_per_insn_1st.  */
1423     2,                  /* ldm_regs_per_insn_subsequent.  */
1424     COSTS_N_INSNS (4),  /* loadf.  */
1425     COSTS_N_INSNS (4),  /* loadd.  */
1426     0,                  /* load_unaligned.  */
1427     0,                  /* store.  */
1428     0,                  /* strd.  */
1429     COSTS_N_INSNS (1),  /* stm_1st.  */
1430     1,                  /* stm_regs_per_insn_1st.  */
1431     2,                  /* stm_regs_per_insn_subsequent.  */
1432     0,                  /* storef.  */
1433     0,                  /* stored.  */
1434     0                   /* store_unaligned.  */
1435   },
1436   {
1437     /* FP SFmode */
1438     {
1439       COSTS_N_INSNS (17),       /* div.  */
1440       COSTS_N_INSNS (4),        /* mult.  */
1441       COSTS_N_INSNS (8),        /* mult_addsub. */
1442       COSTS_N_INSNS (8),        /* fma.  */
1443       COSTS_N_INSNS (4),        /* addsub.  */
1444       COSTS_N_INSNS (2),        /* fpconst. */
1445       COSTS_N_INSNS (2),        /* neg.  */
1446       COSTS_N_INSNS (5),        /* compare.  */
1447       COSTS_N_INSNS (4),        /* widen.  */
1448       COSTS_N_INSNS (4),        /* narrow.  */
1449       COSTS_N_INSNS (4),        /* toint.  */
1450       COSTS_N_INSNS (4),        /* fromint.  */
1451       COSTS_N_INSNS (4)         /* roundint.  */
1452     },
1453     /* FP DFmode */
1454     {
1455       COSTS_N_INSNS (31),       /* div.  */
1456       COSTS_N_INSNS (4),        /* mult.  */
1457       COSTS_N_INSNS (8),        /* mult_addsub.  */
1458       COSTS_N_INSNS (8),        /* fma.  */
1459       COSTS_N_INSNS (4),        /* addsub.  */
1460       COSTS_N_INSNS (2),        /* fpconst.  */
1461       COSTS_N_INSNS (2),        /* neg.  */
1462       COSTS_N_INSNS (2),        /* compare.  */
1463       COSTS_N_INSNS (4),        /* widen.  */
1464       COSTS_N_INSNS (4),        /* narrow.  */
1465       COSTS_N_INSNS (4),        /* toint.  */
1466       COSTS_N_INSNS (4),        /* fromint.  */
1467       COSTS_N_INSNS (4)         /* roundint.  */
1468     }
1469   },
1470   /* Vector */
1471   {
1472     COSTS_N_INSNS (1)   /* alu.  */
1473   }
1474 };
1475
1476 const struct cpu_cost_table v7m_extra_costs =
1477 {
1478   /* ALU */
1479   {
1480     0,                  /* arith.  */
1481     0,                  /* logical.  */
1482     0,                  /* shift.  */
1483     0,                  /* shift_reg.  */
1484     0,                  /* arith_shift.  */
1485     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1486     0,                  /* log_shift.  */
1487     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1488     0,                  /* extend.  */
1489     COSTS_N_INSNS (1),  /* extend_arith.  */
1490     0,                  /* bfi.  */
1491     0,                  /* bfx.  */
1492     0,                  /* clz.  */
1493     0,                  /* rev.  */
1494     COSTS_N_INSNS (1),  /* non_exec.  */
1495     false               /* non_exec_costs_exec.  */
1496   },
1497   {
1498     /* MULT SImode */
1499     {
1500       COSTS_N_INSNS (1),        /* simple.  */
1501       COSTS_N_INSNS (1),        /* flag_setting.  */
1502       COSTS_N_INSNS (2),        /* extend.  */
1503       COSTS_N_INSNS (1),        /* add.  */
1504       COSTS_N_INSNS (3),        /* extend_add.  */
1505       COSTS_N_INSNS (8)         /* idiv.  */
1506     },
1507     /* MULT DImode */
1508     {
1509       0,                        /* simple (N/A).  */
1510       0,                        /* flag_setting (N/A).  */
1511       COSTS_N_INSNS (2),        /* extend.  */
1512       0,                        /* add (N/A).  */
1513       COSTS_N_INSNS (3),        /* extend_add.  */
1514       0                         /* idiv (N/A).  */
1515     }
1516   },
1517   /* LD/ST */
1518   {
1519     COSTS_N_INSNS (2),  /* load.  */
1520     0,                  /* load_sign_extend.  */
1521     COSTS_N_INSNS (3),  /* ldrd.  */
1522     COSTS_N_INSNS (2),  /* ldm_1st.  */
1523     1,                  /* ldm_regs_per_insn_1st.  */
1524     1,                  /* ldm_regs_per_insn_subsequent.  */
1525     COSTS_N_INSNS (2),  /* loadf.  */
1526     COSTS_N_INSNS (3),  /* loadd.  */
1527     COSTS_N_INSNS (1),  /* load_unaligned.  */
1528     COSTS_N_INSNS (2),  /* store.  */
1529     COSTS_N_INSNS (3),  /* strd.  */
1530     COSTS_N_INSNS (2),  /* stm_1st.  */
1531     1,                  /* stm_regs_per_insn_1st.  */
1532     1,                  /* stm_regs_per_insn_subsequent.  */
1533     COSTS_N_INSNS (2),  /* storef.  */
1534     COSTS_N_INSNS (3),  /* stored.  */
1535     COSTS_N_INSNS (1)  /* store_unaligned.  */
1536   },
1537   {
1538     /* FP SFmode */
1539     {
1540       COSTS_N_INSNS (7),        /* div.  */
1541       COSTS_N_INSNS (2),        /* mult.  */
1542       COSTS_N_INSNS (5),        /* mult_addsub.  */
1543       COSTS_N_INSNS (3),        /* fma.  */
1544       COSTS_N_INSNS (1),        /* addsub.  */
1545       0,                        /* fpconst.  */
1546       0,                        /* neg.  */
1547       0,                        /* compare.  */
1548       0,                        /* widen.  */
1549       0,                        /* narrow.  */
1550       0,                        /* toint.  */
1551       0,                        /* fromint.  */
1552       0                         /* roundint.  */
1553     },
1554     /* FP DFmode */
1555     {
1556       COSTS_N_INSNS (15),       /* div.  */
1557       COSTS_N_INSNS (5),        /* mult.  */
1558       COSTS_N_INSNS (7),        /* mult_addsub.  */
1559       COSTS_N_INSNS (7),        /* fma.  */
1560       COSTS_N_INSNS (3),        /* addsub.  */
1561       0,                        /* fpconst.  */
1562       0,                        /* neg.  */
1563       0,                        /* compare.  */
1564       0,                        /* widen.  */
1565       0,                        /* narrow.  */
1566       0,                        /* toint.  */
1567       0,                        /* fromint.  */
1568       0                         /* roundint.  */
1569     }
1570   },
1571   /* Vector */
1572   {
1573     COSTS_N_INSNS (1)   /* alu.  */
1574   }
1575 };
1576
1577 const struct tune_params arm_slowmul_tune =
1578 {
1579   arm_slowmul_rtx_costs,
1580   NULL,
1581   NULL,                                         /* Sched adj cost.  */
1582   3,                                            /* Constant limit.  */
1583   5,                                            /* Max cond insns.  */
1584   ARM_PREFETCH_NOT_BENEFICIAL,
1585   true,                                         /* Prefer constant pool.  */
1586   arm_default_branch_cost,
1587   false,                                        /* Prefer LDRD/STRD.  */
1588   {true, true},                                 /* Prefer non short circuit.  */
1589   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1590   false,                                        /* Prefer Neon for 64-bits bitops.  */
1591   false, false                                  /* Prefer 32-bit encodings.  */
1592 };
1593
1594 const struct tune_params arm_fastmul_tune =
1595 {
1596   arm_fastmul_rtx_costs,
1597   NULL,
1598   NULL,                                         /* Sched adj cost.  */
1599   1,                                            /* Constant limit.  */
1600   5,                                            /* Max cond insns.  */
1601   ARM_PREFETCH_NOT_BENEFICIAL,
1602   true,                                         /* Prefer constant pool.  */
1603   arm_default_branch_cost,
1604   false,                                        /* Prefer LDRD/STRD.  */
1605   {true, true},                                 /* Prefer non short circuit.  */
1606   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1607   false,                                        /* Prefer Neon for 64-bits bitops.  */
1608   false, false                                  /* Prefer 32-bit encodings.  */
1609 };
1610
1611 /* StrongARM has early execution of branches, so a sequence that is worth
1612    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1613
1614 const struct tune_params arm_strongarm_tune =
1615 {
1616   arm_fastmul_rtx_costs,
1617   NULL,
1618   NULL,                                         /* Sched adj cost.  */
1619   1,                                            /* Constant limit.  */
1620   3,                                            /* Max cond insns.  */
1621   ARM_PREFETCH_NOT_BENEFICIAL,
1622   true,                                         /* Prefer constant pool.  */
1623   arm_default_branch_cost,
1624   false,                                        /* Prefer LDRD/STRD.  */
1625   {true, true},                                 /* Prefer non short circuit.  */
1626   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1627   false,                                        /* Prefer Neon for 64-bits bitops.  */
1628   false, false                                  /* Prefer 32-bit encodings.  */
1629 };
1630
1631 const struct tune_params arm_xscale_tune =
1632 {
1633   arm_xscale_rtx_costs,
1634   NULL,
1635   xscale_sched_adjust_cost,
1636   2,                                            /* Constant limit.  */
1637   3,                                            /* Max cond insns.  */
1638   ARM_PREFETCH_NOT_BENEFICIAL,
1639   true,                                         /* Prefer constant pool.  */
1640   arm_default_branch_cost,
1641   false,                                        /* Prefer LDRD/STRD.  */
1642   {true, true},                                 /* Prefer non short circuit.  */
1643   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1644   false,                                        /* Prefer Neon for 64-bits bitops.  */
1645   false, false                                  /* Prefer 32-bit encodings.  */
1646 };
1647
1648 const struct tune_params arm_9e_tune =
1649 {
1650   arm_9e_rtx_costs,
1651   NULL,
1652   NULL,                                         /* Sched adj cost.  */
1653   1,                                            /* Constant limit.  */
1654   5,                                            /* Max cond insns.  */
1655   ARM_PREFETCH_NOT_BENEFICIAL,
1656   true,                                         /* Prefer constant pool.  */
1657   arm_default_branch_cost,
1658   false,                                        /* Prefer LDRD/STRD.  */
1659   {true, true},                                 /* Prefer non short circuit.  */
1660   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1661   false,                                        /* Prefer Neon for 64-bits bitops.  */
1662   false, false                                  /* Prefer 32-bit encodings.  */
1663 };
1664
1665 const struct tune_params arm_v6t2_tune =
1666 {
1667   arm_9e_rtx_costs,
1668   NULL,
1669   NULL,                                         /* Sched adj cost.  */
1670   1,                                            /* Constant limit.  */
1671   5,                                            /* Max cond insns.  */
1672   ARM_PREFETCH_NOT_BENEFICIAL,
1673   false,                                        /* Prefer constant pool.  */
1674   arm_default_branch_cost,
1675   false,                                        /* Prefer LDRD/STRD.  */
1676   {true, true},                                 /* Prefer non short circuit.  */
1677   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1678   false,                                        /* Prefer Neon for 64-bits bitops.  */
1679   false, false                                  /* Prefer 32-bit encodings.  */
1680 };
1681
1682 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1683 const struct tune_params arm_cortex_tune =
1684 {
1685   arm_9e_rtx_costs,
1686   &generic_extra_costs,
1687   NULL,                                         /* Sched adj cost.  */
1688   1,                                            /* Constant limit.  */
1689   5,                                            /* Max cond insns.  */
1690   ARM_PREFETCH_NOT_BENEFICIAL,
1691   false,                                        /* Prefer constant pool.  */
1692   arm_default_branch_cost,
1693   false,                                        /* Prefer LDRD/STRD.  */
1694   {true, true},                                 /* Prefer non short circuit.  */
1695   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1696   false,                                        /* Prefer Neon for 64-bits bitops.  */
1697   false, false                                  /* Prefer 32-bit encodings.  */
1698 };
1699
1700 const struct tune_params arm_cortex_a8_tune =
1701 {
1702   arm_9e_rtx_costs,
1703   &cortexa8_extra_costs,
1704   NULL,                                         /* Sched adj cost.  */
1705   1,                                            /* Constant limit.  */
1706   5,                                            /* Max cond insns.  */
1707   ARM_PREFETCH_NOT_BENEFICIAL,
1708   false,                                        /* Prefer constant pool.  */
1709   arm_default_branch_cost,
1710   false,                                        /* Prefer LDRD/STRD.  */
1711   {true, true},                                 /* Prefer non short circuit.  */
1712   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1713   false,                                        /* Prefer Neon for 64-bits bitops.  */
1714   false, false                                  /* Prefer 32-bit encodings.  */
1715 };
1716
1717 const struct tune_params arm_cortex_a7_tune =
1718 {
1719   arm_9e_rtx_costs,
1720   &cortexa7_extra_costs,
1721   NULL,
1722   1,                                            /* Constant limit.  */
1723   5,                                            /* Max cond insns.  */
1724   ARM_PREFETCH_NOT_BENEFICIAL,
1725   false,                                        /* Prefer constant pool.  */
1726   arm_default_branch_cost,
1727   false,                                        /* Prefer LDRD/STRD.  */
1728   {true, true},                                 /* Prefer non short circuit.  */
1729   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1730   false,                                        /* Prefer Neon for 64-bits bitops.  */
1731   false, false                                  /* Prefer 32-bit encodings.  */
1732 };
1733
1734 const struct tune_params arm_cortex_a15_tune =
1735 {
1736   arm_9e_rtx_costs,
1737   &cortexa15_extra_costs,
1738   NULL,                                         /* Sched adj cost.  */
1739   1,                                            /* Constant limit.  */
1740   2,                                            /* Max cond insns.  */
1741   ARM_PREFETCH_NOT_BENEFICIAL,
1742   false,                                        /* Prefer constant pool.  */
1743   arm_default_branch_cost,
1744   true,                                         /* Prefer LDRD/STRD.  */
1745   {true, true},                                 /* Prefer non short circuit.  */
1746   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1747   false,                                        /* Prefer Neon for 64-bits bitops.  */
1748   true, true                                    /* Prefer 32-bit encodings.  */
1749 };
1750
1751 const struct tune_params arm_cortex_a53_tune =
1752 {
1753   arm_9e_rtx_costs,
1754   &cortexa53_extra_costs,
1755   NULL,                                         /* Scheduler cost adjustment.  */
1756   1,                                            /* Constant limit.  */
1757   5,                                            /* Max cond insns.  */
1758   ARM_PREFETCH_NOT_BENEFICIAL,
1759   false,                                        /* Prefer constant pool.  */
1760   arm_default_branch_cost,
1761   false,                                        /* Prefer LDRD/STRD.  */
1762   {true, true},                                 /* Prefer non short circuit.  */
1763   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1764   false,                                        /* Prefer Neon for 64-bits bitops.  */
1765   false, false                                  /* Prefer 32-bit encodings.  */
1766 };
1767
1768 const struct tune_params arm_cortex_a57_tune =
1769 {
1770   arm_9e_rtx_costs,
1771   &cortexa57_extra_costs,
1772   NULL,                                         /* Scheduler cost adjustment.  */
1773   1,                                           /* Constant limit.  */
1774   2,                                           /* Max cond insns.  */
1775   ARM_PREFETCH_NOT_BENEFICIAL,
1776   false,                                       /* Prefer constant pool.  */
1777   arm_default_branch_cost,
1778   true,                                       /* Prefer LDRD/STRD.  */
1779   {true, true},                                /* Prefer non short circuit.  */
1780   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1781   false,                                       /* Prefer Neon for 64-bits bitops.  */
1782   true, true                                   /* Prefer 32-bit encodings.  */
1783 };
1784
1785 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1786    less appealing.  Set max_insns_skipped to a low value.  */
1787
1788 const struct tune_params arm_cortex_a5_tune =
1789 {
1790   arm_9e_rtx_costs,
1791   NULL,
1792   NULL,                                         /* Sched adj cost.  */
1793   1,                                            /* Constant limit.  */
1794   1,                                            /* Max cond insns.  */
1795   ARM_PREFETCH_NOT_BENEFICIAL,
1796   false,                                        /* Prefer constant pool.  */
1797   arm_cortex_a5_branch_cost,
1798   false,                                        /* Prefer LDRD/STRD.  */
1799   {false, false},                               /* Prefer non short circuit.  */
1800   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1801   false,                                        /* Prefer Neon for 64-bits bitops.  */
1802   false, false                                  /* Prefer 32-bit encodings.  */
1803 };
1804
1805 const struct tune_params arm_cortex_a9_tune =
1806 {
1807   arm_9e_rtx_costs,
1808   &cortexa9_extra_costs,
1809   cortex_a9_sched_adjust_cost,
1810   1,                                            /* Constant limit.  */
1811   5,                                            /* Max cond insns.  */
1812   ARM_PREFETCH_BENEFICIAL(4,32,32),
1813   false,                                        /* Prefer constant pool.  */
1814   arm_default_branch_cost,
1815   false,                                        /* Prefer LDRD/STRD.  */
1816   {true, true},                                 /* Prefer non short circuit.  */
1817   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1818   false,                                        /* Prefer Neon for 64-bits bitops.  */
1819   false, false                                  /* Prefer 32-bit encodings.  */
1820 };
1821
1822 const struct tune_params arm_cortex_a12_tune =
1823 {
1824   arm_9e_rtx_costs,
1825   &cortexa12_extra_costs,
1826   NULL,
1827   1,                                            /* Constant limit.  */
1828   5,                                            /* Max cond insns.  */
1829   ARM_PREFETCH_BENEFICIAL(4,32,32),
1830   false,                                        /* Prefer constant pool.  */
1831   arm_default_branch_cost,
1832   true,                                         /* Prefer LDRD/STRD.  */
1833   {true, true},                                 /* Prefer non short circuit.  */
1834   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1835   false,                                        /* Prefer Neon for 64-bits bitops.  */
1836   false, false                                  /* Prefer 32-bit encodings.  */
1837 };
1838
1839 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
1840    cycle to execute each.  An LDR from the constant pool also takes two cycles
1841    to execute, but mildly increases pipelining opportunity (consecutive
1842    loads/stores can be pipelined together, saving one cycle), and may also
1843    improve icache utilisation.  Hence we prefer the constant pool for such
1844    processors.  */
1845
1846 const struct tune_params arm_v7m_tune =
1847 {
1848   arm_9e_rtx_costs,
1849   &v7m_extra_costs,
1850   NULL,                                         /* Sched adj cost.  */
1851   1,                                            /* Constant limit.  */
1852   2,                                            /* Max cond insns.  */
1853   ARM_PREFETCH_NOT_BENEFICIAL,
1854   true,                                         /* Prefer constant pool.  */
1855   arm_cortex_m_branch_cost,
1856   false,                                        /* Prefer LDRD/STRD.  */
1857   {false, false},                               /* Prefer non short circuit.  */
1858   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1859   false,                                        /* Prefer Neon for 64-bits bitops.  */
1860   false, false                                  /* Prefer 32-bit encodings.  */
1861 };
1862
1863 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1864    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
1865 const struct tune_params arm_v6m_tune =
1866 {
1867   arm_9e_rtx_costs,
1868   NULL,
1869   NULL,                                         /* Sched adj cost.  */
1870   1,                                            /* Constant limit.  */
1871   5,                                            /* Max cond insns.  */
1872   ARM_PREFETCH_NOT_BENEFICIAL,
1873   false,                                        /* Prefer constant pool.  */
1874   arm_default_branch_cost,
1875   false,                                        /* Prefer LDRD/STRD.  */
1876   {false, false},                               /* Prefer non short circuit.  */
1877   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1878   false,                                        /* Prefer Neon for 64-bits bitops.  */
1879   false, false                                  /* Prefer 32-bit encodings.  */
1880 };
1881
1882 const struct tune_params arm_fa726te_tune =
1883 {
1884   arm_9e_rtx_costs,
1885   NULL,
1886   fa726te_sched_adjust_cost,
1887   1,                                            /* Constant limit.  */
1888   5,                                            /* Max cond insns.  */
1889   ARM_PREFETCH_NOT_BENEFICIAL,
1890   true,                                         /* Prefer constant pool.  */
1891   arm_default_branch_cost,
1892   false,                                        /* Prefer LDRD/STRD.  */
1893   {true, true},                                 /* Prefer non short circuit.  */
1894   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1895   false,                                        /* Prefer Neon for 64-bits bitops.  */
1896   false, false                                  /* Prefer 32-bit encodings.  */
1897 };
1898
1899
1900 /* Not all of these give usefully different compilation alternatives,
1901    but there is no simple way of generalizing them.  */
1902 static const struct processors all_cores[] =
1903 {
1904   /* ARM Cores */
1905 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1906   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
1907     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1908 #include "arm-cores.def"
1909 #undef ARM_CORE
1910   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1911 };
1912
1913 static const struct processors all_architectures[] =
1914 {
1915   /* ARM Architectures */
1916   /* We don't specify tuning costs here as it will be figured out
1917      from the core.  */
1918
1919 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1920   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1921 #include "arm-arches.def"
1922 #undef ARM_ARCH
1923   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1924 };
1925
1926
1927 /* These are populated as commandline arguments are processed, or NULL
1928    if not specified.  */
1929 static const struct processors *arm_selected_arch;
1930 static const struct processors *arm_selected_cpu;
1931 static const struct processors *arm_selected_tune;
1932
1933 /* The name of the preprocessor macro to define for this architecture.  */
1934
1935 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1936
1937 /* Available values for -mfpu=.  */
1938
1939 static const struct arm_fpu_desc all_fpus[] =
1940 {
1941 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1942   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1943 #include "arm-fpus.def"
1944 #undef ARM_FPU
1945 };
1946
1947
1948 /* Supported TLS relocations.  */
1949
1950 enum tls_reloc {
1951   TLS_GD32,
1952   TLS_LDM32,
1953   TLS_LDO32,
1954   TLS_IE32,
1955   TLS_LE32,
1956   TLS_DESCSEQ   /* GNU scheme */
1957 };
1958
1959 /* The maximum number of insns to be used when loading a constant.  */
1960 inline static int
1961 arm_constant_limit (bool size_p)
1962 {
1963   return size_p ? 1 : current_tune->constant_limit;
1964 }
1965
1966 /* Emit an insn that's a simple single-set.  Both the operands must be known
1967    to be valid.  */
1968 inline static rtx
1969 emit_set_insn (rtx x, rtx y)
1970 {
1971   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1972 }
1973
1974 /* Return the number of bits set in VALUE.  */
1975 static unsigned
1976 bit_count (unsigned long value)
1977 {
1978   unsigned long count = 0;
1979
1980   while (value)
1981     {
1982       count++;
1983       value &= value - 1;  /* Clear the least-significant set bit.  */
1984     }
1985
1986   return count;
1987 }
1988
1989 typedef struct
1990 {
1991   enum machine_mode mode;
1992   const char *name;
1993 } arm_fixed_mode_set;
1994
1995 /* A small helper for setting fixed-point library libfuncs.  */
1996
1997 static void
1998 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1999                              const char *funcname, const char *modename,
2000                              int num_suffix)
2001 {
2002   char buffer[50];
2003
2004   if (num_suffix == 0)
2005     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2006   else
2007     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2008
2009   set_optab_libfunc (optable, mode, buffer);
2010 }
2011
2012 static void
2013 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2014                             enum machine_mode from, const char *funcname,
2015                             const char *toname, const char *fromname)
2016 {
2017   char buffer[50];
2018   const char *maybe_suffix_2 = "";
2019
2020   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2021   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2022       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2023       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2024     maybe_suffix_2 = "2";
2025
2026   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2027            maybe_suffix_2);
2028
2029   set_conv_libfunc (optable, to, from, buffer);
2030 }
2031
2032 /* Set up library functions unique to ARM.  */
2033
2034 static void
2035 arm_init_libfuncs (void)
2036 {
2037   /* For Linux, we have access to kernel support for atomic operations.  */
2038   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2039     init_sync_libfuncs (2 * UNITS_PER_WORD);
2040
2041   /* There are no special library functions unless we are using the
2042      ARM BPABI.  */
2043   if (!TARGET_BPABI)
2044     return;
2045
2046   /* The functions below are described in Section 4 of the "Run-Time
2047      ABI for the ARM architecture", Version 1.0.  */
2048
2049   /* Double-precision floating-point arithmetic.  Table 2.  */
2050   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2051   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2052   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2053   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2054   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2055
2056   /* Double-precision comparisons.  Table 3.  */
2057   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2058   set_optab_libfunc (ne_optab, DFmode, NULL);
2059   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2060   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2061   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2062   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2063   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2064
2065   /* Single-precision floating-point arithmetic.  Table 4.  */
2066   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2067   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2068   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2069   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2070   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2071
2072   /* Single-precision comparisons.  Table 5.  */
2073   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2074   set_optab_libfunc (ne_optab, SFmode, NULL);
2075   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2076   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2077   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2078   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2079   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2080
2081   /* Floating-point to integer conversions.  Table 6.  */
2082   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2083   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2084   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2085   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2086   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2087   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2088   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2089   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2090
2091   /* Conversions between floating types.  Table 7.  */
2092   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2093   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2094
2095   /* Integer to floating-point conversions.  Table 8.  */
2096   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2097   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2098   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2099   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2100   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2101   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2102   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2103   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2104
2105   /* Long long.  Table 9.  */
2106   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2107   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2108   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2109   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2110   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2111   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2112   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2113   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2114
2115   /* Integer (32/32->32) division.  \S 4.3.1.  */
2116   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2117   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2118
2119   /* The divmod functions are designed so that they can be used for
2120      plain division, even though they return both the quotient and the
2121      remainder.  The quotient is returned in the usual location (i.e.,
2122      r0 for SImode, {r0, r1} for DImode), just as would be expected
2123      for an ordinary division routine.  Because the AAPCS calling
2124      conventions specify that all of { r0, r1, r2, r3 } are
2125      callee-saved registers, there is no need to tell the compiler
2126      explicitly that those registers are clobbered by these
2127      routines.  */
2128   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2129   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2130
2131   /* For SImode division the ABI provides div-without-mod routines,
2132      which are faster.  */
2133   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2134   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2135
2136   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2137      divmod libcalls instead.  */
2138   set_optab_libfunc (smod_optab, DImode, NULL);
2139   set_optab_libfunc (umod_optab, DImode, NULL);
2140   set_optab_libfunc (smod_optab, SImode, NULL);
2141   set_optab_libfunc (umod_optab, SImode, NULL);
2142
2143   /* Half-precision float operations.  The compiler handles all operations
2144      with NULL libfuncs by converting the SFmode.  */
2145   switch (arm_fp16_format)
2146     {
2147     case ARM_FP16_FORMAT_IEEE:
2148     case ARM_FP16_FORMAT_ALTERNATIVE:
2149
2150       /* Conversions.  */
2151       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2152                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2153                          ? "__gnu_f2h_ieee"
2154                          : "__gnu_f2h_alternative"));
2155       set_conv_libfunc (sext_optab, SFmode, HFmode,
2156                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2157                          ? "__gnu_h2f_ieee"
2158                          : "__gnu_h2f_alternative"));
2159
2160       /* Arithmetic.  */
2161       set_optab_libfunc (add_optab, HFmode, NULL);
2162       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2163       set_optab_libfunc (smul_optab, HFmode, NULL);
2164       set_optab_libfunc (neg_optab, HFmode, NULL);
2165       set_optab_libfunc (sub_optab, HFmode, NULL);
2166
2167       /* Comparisons.  */
2168       set_optab_libfunc (eq_optab, HFmode, NULL);
2169       set_optab_libfunc (ne_optab, HFmode, NULL);
2170       set_optab_libfunc (lt_optab, HFmode, NULL);
2171       set_optab_libfunc (le_optab, HFmode, NULL);
2172       set_optab_libfunc (ge_optab, HFmode, NULL);
2173       set_optab_libfunc (gt_optab, HFmode, NULL);
2174       set_optab_libfunc (unord_optab, HFmode, NULL);
2175       break;
2176
2177     default:
2178       break;
2179     }
2180
2181   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2182   {
2183     const arm_fixed_mode_set fixed_arith_modes[] =
2184       {
2185         { QQmode, "qq" },
2186         { UQQmode, "uqq" },
2187         { HQmode, "hq" },
2188         { UHQmode, "uhq" },
2189         { SQmode, "sq" },
2190         { USQmode, "usq" },
2191         { DQmode, "dq" },
2192         { UDQmode, "udq" },
2193         { TQmode, "tq" },
2194         { UTQmode, "utq" },
2195         { HAmode, "ha" },
2196         { UHAmode, "uha" },
2197         { SAmode, "sa" },
2198         { USAmode, "usa" },
2199         { DAmode, "da" },
2200         { UDAmode, "uda" },
2201         { TAmode, "ta" },
2202         { UTAmode, "uta" }
2203       };
2204     const arm_fixed_mode_set fixed_conv_modes[] =
2205       {
2206         { QQmode, "qq" },
2207         { UQQmode, "uqq" },
2208         { HQmode, "hq" },
2209         { UHQmode, "uhq" },
2210         { SQmode, "sq" },
2211         { USQmode, "usq" },
2212         { DQmode, "dq" },
2213         { UDQmode, "udq" },
2214         { TQmode, "tq" },
2215         { UTQmode, "utq" },
2216         { HAmode, "ha" },
2217         { UHAmode, "uha" },
2218         { SAmode, "sa" },
2219         { USAmode, "usa" },
2220         { DAmode, "da" },
2221         { UDAmode, "uda" },
2222         { TAmode, "ta" },
2223         { UTAmode, "uta" },
2224         { QImode, "qi" },
2225         { HImode, "hi" },
2226         { SImode, "si" },
2227         { DImode, "di" },
2228         { TImode, "ti" },
2229         { SFmode, "sf" },
2230         { DFmode, "df" }
2231       };
2232     unsigned int i, j;
2233
2234     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2235       {
2236         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2237                                      "add", fixed_arith_modes[i].name, 3);
2238         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2239                                      "ssadd", fixed_arith_modes[i].name, 3);
2240         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2241                                      "usadd", fixed_arith_modes[i].name, 3);
2242         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2243                                      "sub", fixed_arith_modes[i].name, 3);
2244         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2245                                      "sssub", fixed_arith_modes[i].name, 3);
2246         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2247                                      "ussub", fixed_arith_modes[i].name, 3);
2248         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2249                                      "mul", fixed_arith_modes[i].name, 3);
2250         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2251                                      "ssmul", fixed_arith_modes[i].name, 3);
2252         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2253                                      "usmul", fixed_arith_modes[i].name, 3);
2254         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2255                                      "div", fixed_arith_modes[i].name, 3);
2256         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2257                                      "udiv", fixed_arith_modes[i].name, 3);
2258         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2259                                      "ssdiv", fixed_arith_modes[i].name, 3);
2260         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2261                                      "usdiv", fixed_arith_modes[i].name, 3);
2262         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2263                                      "neg", fixed_arith_modes[i].name, 2);
2264         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2265                                      "ssneg", fixed_arith_modes[i].name, 2);
2266         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2267                                      "usneg", fixed_arith_modes[i].name, 2);
2268         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2269                                      "ashl", fixed_arith_modes[i].name, 3);
2270         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2271                                      "ashr", fixed_arith_modes[i].name, 3);
2272         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2273                                      "lshr", fixed_arith_modes[i].name, 3);
2274         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2275                                      "ssashl", fixed_arith_modes[i].name, 3);
2276         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2277                                      "usashl", fixed_arith_modes[i].name, 3);
2278         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2279                                      "cmp", fixed_arith_modes[i].name, 2);
2280       }
2281
2282     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2283       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2284         {
2285           if (i == j
2286               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2287                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2288             continue;
2289
2290           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2291                                       fixed_conv_modes[j].mode, "fract",
2292                                       fixed_conv_modes[i].name,
2293                                       fixed_conv_modes[j].name);
2294           arm_set_fixed_conv_libfunc (satfract_optab,
2295                                       fixed_conv_modes[i].mode,
2296                                       fixed_conv_modes[j].mode, "satfract",
2297                                       fixed_conv_modes[i].name,
2298                                       fixed_conv_modes[j].name);
2299           arm_set_fixed_conv_libfunc (fractuns_optab,
2300                                       fixed_conv_modes[i].mode,
2301                                       fixed_conv_modes[j].mode, "fractuns",
2302                                       fixed_conv_modes[i].name,
2303                                       fixed_conv_modes[j].name);
2304           arm_set_fixed_conv_libfunc (satfractuns_optab,
2305                                       fixed_conv_modes[i].mode,
2306                                       fixed_conv_modes[j].mode, "satfractuns",
2307                                       fixed_conv_modes[i].name,
2308                                       fixed_conv_modes[j].name);
2309         }
2310   }
2311
2312   if (TARGET_AAPCS_BASED)
2313     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2314 }
2315
2316 /* On AAPCS systems, this is the "struct __va_list".  */
2317 static GTY(()) tree va_list_type;
2318
2319 /* Return the type to use as __builtin_va_list.  */
2320 static tree
2321 arm_build_builtin_va_list (void)
2322 {
2323   tree va_list_name;
2324   tree ap_field;
2325
2326   if (!TARGET_AAPCS_BASED)
2327     return std_build_builtin_va_list ();
2328
2329   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2330      defined as:
2331
2332        struct __va_list
2333        {
2334          void *__ap;
2335        };
2336
2337      The C Library ABI further reinforces this definition in \S
2338      4.1.
2339
2340      We must follow this definition exactly.  The structure tag
2341      name is visible in C++ mangled names, and thus forms a part
2342      of the ABI.  The field name may be used by people who
2343      #include <stdarg.h>.  */
2344   /* Create the type.  */
2345   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2346   /* Give it the required name.  */
2347   va_list_name = build_decl (BUILTINS_LOCATION,
2348                              TYPE_DECL,
2349                              get_identifier ("__va_list"),
2350                              va_list_type);
2351   DECL_ARTIFICIAL (va_list_name) = 1;
2352   TYPE_NAME (va_list_type) = va_list_name;
2353   TYPE_STUB_DECL (va_list_type) = va_list_name;
2354   /* Create the __ap field.  */
2355   ap_field = build_decl (BUILTINS_LOCATION,
2356                          FIELD_DECL,
2357                          get_identifier ("__ap"),
2358                          ptr_type_node);
2359   DECL_ARTIFICIAL (ap_field) = 1;
2360   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2361   TYPE_FIELDS (va_list_type) = ap_field;
2362   /* Compute its layout.  */
2363   layout_type (va_list_type);
2364
2365   return va_list_type;
2366 }
2367
2368 /* Return an expression of type "void *" pointing to the next
2369    available argument in a variable-argument list.  VALIST is the
2370    user-level va_list object, of type __builtin_va_list.  */
2371 static tree
2372 arm_extract_valist_ptr (tree valist)
2373 {
2374   if (TREE_TYPE (valist) == error_mark_node)
2375     return error_mark_node;
2376
2377   /* On an AAPCS target, the pointer is stored within "struct
2378      va_list".  */
2379   if (TARGET_AAPCS_BASED)
2380     {
2381       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2382       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2383                        valist, ap_field, NULL_TREE);
2384     }
2385
2386   return valist;
2387 }
2388
2389 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2390 static void
2391 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2392 {
2393   valist = arm_extract_valist_ptr (valist);
2394   std_expand_builtin_va_start (valist, nextarg);
2395 }
2396
2397 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2398 static tree
2399 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2400                           gimple_seq *post_p)
2401 {
2402   valist = arm_extract_valist_ptr (valist);
2403   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2404 }
2405
2406 /* Fix up any incompatible options that the user has specified.  */
2407 static void
2408 arm_option_override (void)
2409 {
2410   if (global_options_set.x_arm_arch_option)
2411     arm_selected_arch = &all_architectures[arm_arch_option];
2412
2413   if (global_options_set.x_arm_cpu_option)
2414     {
2415       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2416       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2417     }
2418
2419   if (global_options_set.x_arm_tune_option)
2420     arm_selected_tune = &all_cores[(int) arm_tune_option];
2421
2422 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2423   SUBTARGET_OVERRIDE_OPTIONS;
2424 #endif
2425
2426   if (arm_selected_arch)
2427     {
2428       if (arm_selected_cpu)
2429         {
2430           /* Check for conflict between mcpu and march.  */
2431           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2432             {
2433               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2434                        arm_selected_cpu->name, arm_selected_arch->name);
2435               /* -march wins for code generation.
2436                  -mcpu wins for default tuning.  */
2437               if (!arm_selected_tune)
2438                 arm_selected_tune = arm_selected_cpu;
2439
2440               arm_selected_cpu = arm_selected_arch;
2441             }
2442           else
2443             /* -mcpu wins.  */
2444             arm_selected_arch = NULL;
2445         }
2446       else
2447         /* Pick a CPU based on the architecture.  */
2448         arm_selected_cpu = arm_selected_arch;
2449     }
2450
2451   /* If the user did not specify a processor, choose one for them.  */
2452   if (!arm_selected_cpu)
2453     {
2454       const struct processors * sel;
2455       unsigned int        sought;
2456
2457       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2458       if (!arm_selected_cpu->name)
2459         {
2460 #ifdef SUBTARGET_CPU_DEFAULT
2461           /* Use the subtarget default CPU if none was specified by
2462              configure.  */
2463           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2464 #endif
2465           /* Default to ARM6.  */
2466           if (!arm_selected_cpu->name)
2467             arm_selected_cpu = &all_cores[arm6];
2468         }
2469
2470       sel = arm_selected_cpu;
2471       insn_flags = sel->flags;
2472
2473       /* Now check to see if the user has specified some command line
2474          switch that require certain abilities from the cpu.  */
2475       sought = 0;
2476
2477       if (TARGET_INTERWORK || TARGET_THUMB)
2478         {
2479           sought |= (FL_THUMB | FL_MODE32);
2480
2481           /* There are no ARM processors that support both APCS-26 and
2482              interworking.  Therefore we force FL_MODE26 to be removed
2483              from insn_flags here (if it was set), so that the search
2484              below will always be able to find a compatible processor.  */
2485           insn_flags &= ~FL_MODE26;
2486         }
2487
2488       if (sought != 0 && ((sought & insn_flags) != sought))
2489         {
2490           /* Try to locate a CPU type that supports all of the abilities
2491              of the default CPU, plus the extra abilities requested by
2492              the user.  */
2493           for (sel = all_cores; sel->name != NULL; sel++)
2494             if ((sel->flags & sought) == (sought | insn_flags))
2495               break;
2496
2497           if (sel->name == NULL)
2498             {
2499               unsigned current_bit_count = 0;
2500               const struct processors * best_fit = NULL;
2501
2502               /* Ideally we would like to issue an error message here
2503                  saying that it was not possible to find a CPU compatible
2504                  with the default CPU, but which also supports the command
2505                  line options specified by the programmer, and so they
2506                  ought to use the -mcpu=<name> command line option to
2507                  override the default CPU type.
2508
2509                  If we cannot find a cpu that has both the
2510                  characteristics of the default cpu and the given
2511                  command line options we scan the array again looking
2512                  for a best match.  */
2513               for (sel = all_cores; sel->name != NULL; sel++)
2514                 if ((sel->flags & sought) == sought)
2515                   {
2516                     unsigned count;
2517
2518                     count = bit_count (sel->flags & insn_flags);
2519
2520                     if (count >= current_bit_count)
2521                       {
2522                         best_fit = sel;
2523                         current_bit_count = count;
2524                       }
2525                   }
2526
2527               gcc_assert (best_fit);
2528               sel = best_fit;
2529             }
2530
2531           arm_selected_cpu = sel;
2532         }
2533     }
2534
2535   gcc_assert (arm_selected_cpu);
2536   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2537   if (!arm_selected_tune)
2538     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2539
2540   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2541   insn_flags = arm_selected_cpu->flags;
2542   arm_base_arch = arm_selected_cpu->base_arch;
2543
2544   arm_tune = arm_selected_tune->core;
2545   tune_flags = arm_selected_tune->flags;
2546   current_tune = arm_selected_tune->tune;
2547
2548   /* Make sure that the processor choice does not conflict with any of the
2549      other command line choices.  */
2550   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2551     error ("target CPU does not support ARM mode");
2552
2553   /* BPABI targets use linker tricks to allow interworking on cores
2554      without thumb support.  */
2555   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2556     {
2557       warning (0, "target CPU does not support interworking" );
2558       target_flags &= ~MASK_INTERWORK;
2559     }
2560
2561   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2562     {
2563       warning (0, "target CPU does not support THUMB instructions");
2564       target_flags &= ~MASK_THUMB;
2565     }
2566
2567   if (TARGET_APCS_FRAME && TARGET_THUMB)
2568     {
2569       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2570       target_flags &= ~MASK_APCS_FRAME;
2571     }
2572
2573   /* Callee super interworking implies thumb interworking.  Adding
2574      this to the flags here simplifies the logic elsewhere.  */
2575   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2576     target_flags |= MASK_INTERWORK;
2577
2578   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2579      from here where no function is being compiled currently.  */
2580   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2581     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2582
2583   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2584     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2585
2586   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2587     {
2588       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2589       target_flags |= MASK_APCS_FRAME;
2590     }
2591
2592   if (TARGET_POKE_FUNCTION_NAME)
2593     target_flags |= MASK_APCS_FRAME;
2594
2595   if (TARGET_APCS_REENT && flag_pic)
2596     error ("-fpic and -mapcs-reent are incompatible");
2597
2598   if (TARGET_APCS_REENT)
2599     warning (0, "APCS reentrant code not supported.  Ignored");
2600
2601   /* If this target is normally configured to use APCS frames, warn if they
2602      are turned off and debugging is turned on.  */
2603   if (TARGET_ARM
2604       && write_symbols != NO_DEBUG
2605       && !TARGET_APCS_FRAME
2606       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2607     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2608
2609   if (TARGET_APCS_FLOAT)
2610     warning (0, "passing floating point arguments in fp regs not yet supported");
2611
2612   if (TARGET_LITTLE_WORDS)
2613     warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2614              "will be removed in a future release");
2615
2616   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2617   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2618   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2619   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2620   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2621   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2622   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2623   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2624   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2625   arm_arch6m = arm_arch6 && !arm_arch_notm;
2626   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2627   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2628   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2629   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2630   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2631
2632   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2633   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2634   thumb_code = TARGET_ARM == 0;
2635   thumb1_code = TARGET_THUMB1 != 0;
2636   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2637   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2638   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2639   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2640   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2641   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2642   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2643   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2644   if (arm_restrict_it == 2)
2645     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2646
2647   if (!TARGET_THUMB2)
2648     arm_restrict_it = 0;
2649
2650   /* If we are not using the default (ARM mode) section anchor offset
2651      ranges, then set the correct ranges now.  */
2652   if (TARGET_THUMB1)
2653     {
2654       /* Thumb-1 LDR instructions cannot have negative offsets.
2655          Permissible positive offset ranges are 5-bit (for byte loads),
2656          6-bit (for halfword loads), or 7-bit (for word loads).
2657          Empirical results suggest a 7-bit anchor range gives the best
2658          overall code size.  */
2659       targetm.min_anchor_offset = 0;
2660       targetm.max_anchor_offset = 127;
2661     }
2662   else if (TARGET_THUMB2)
2663     {
2664       /* The minimum is set such that the total size of the block
2665          for a particular anchor is 248 + 1 + 4095 bytes, which is
2666          divisible by eight, ensuring natural spacing of anchors.  */
2667       targetm.min_anchor_offset = -248;
2668       targetm.max_anchor_offset = 4095;
2669     }
2670
2671   /* V5 code we generate is completely interworking capable, so we turn off
2672      TARGET_INTERWORK here to avoid many tests later on.  */
2673
2674   /* XXX However, we must pass the right pre-processor defines to CPP
2675      or GLD can get confused.  This is a hack.  */
2676   if (TARGET_INTERWORK)
2677     arm_cpp_interwork = 1;
2678
2679   if (arm_arch5)
2680     target_flags &= ~MASK_INTERWORK;
2681
2682   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2683     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2684
2685   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2686     error ("iwmmxt abi requires an iwmmxt capable cpu");
2687
2688   if (!global_options_set.x_arm_fpu_index)
2689     {
2690       const char *target_fpu_name;
2691       bool ok;
2692
2693 #ifdef FPUTYPE_DEFAULT
2694       target_fpu_name = FPUTYPE_DEFAULT;
2695 #else
2696       target_fpu_name = "vfp";
2697 #endif
2698
2699       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2700                                   CL_TARGET);
2701       gcc_assert (ok);
2702     }
2703
2704   arm_fpu_desc = &all_fpus[arm_fpu_index];
2705
2706   switch (arm_fpu_desc->model)
2707     {
2708     case ARM_FP_MODEL_VFP:
2709       arm_fpu_attr = FPU_VFP;
2710       break;
2711
2712     default:
2713       gcc_unreachable();
2714     }
2715
2716   if (TARGET_AAPCS_BASED)
2717     {
2718       if (TARGET_CALLER_INTERWORKING)
2719         error ("AAPCS does not support -mcaller-super-interworking");
2720       else
2721         if (TARGET_CALLEE_INTERWORKING)
2722           error ("AAPCS does not support -mcallee-super-interworking");
2723     }
2724
2725   /* iWMMXt and NEON are incompatible.  */
2726   if (TARGET_IWMMXT && TARGET_NEON)
2727     error ("iWMMXt and NEON are incompatible");
2728
2729   /* iWMMXt unsupported under Thumb mode.  */
2730   if (TARGET_THUMB && TARGET_IWMMXT)
2731     error ("iWMMXt unsupported under Thumb mode");
2732
2733   /* __fp16 support currently assumes the core has ldrh.  */
2734   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2735     sorry ("__fp16 and no ldrh");
2736
2737   /* If soft-float is specified then don't use FPU.  */
2738   if (TARGET_SOFT_FLOAT)
2739     arm_fpu_attr = FPU_NONE;
2740
2741   if (TARGET_AAPCS_BASED)
2742     {
2743       if (arm_abi == ARM_ABI_IWMMXT)
2744         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2745       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2746                && TARGET_HARD_FLOAT
2747                && TARGET_VFP)
2748         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2749       else
2750         arm_pcs_default = ARM_PCS_AAPCS;
2751     }
2752   else
2753     {
2754       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2755         sorry ("-mfloat-abi=hard and VFP");
2756
2757       if (arm_abi == ARM_ABI_APCS)
2758         arm_pcs_default = ARM_PCS_APCS;
2759       else
2760         arm_pcs_default = ARM_PCS_ATPCS;
2761     }
2762
2763   /* For arm2/3 there is no need to do any scheduling if we are doing
2764      software floating-point.  */
2765   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2766     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2767
2768   /* Use the cp15 method if it is available.  */
2769   if (target_thread_pointer == TP_AUTO)
2770     {
2771       if (arm_arch6k && !TARGET_THUMB1)
2772         target_thread_pointer = TP_CP15;
2773       else
2774         target_thread_pointer = TP_SOFT;
2775     }
2776
2777   if (TARGET_HARD_TP && TARGET_THUMB1)
2778     error ("can not use -mtp=cp15 with 16-bit Thumb");
2779
2780   /* Override the default structure alignment for AAPCS ABI.  */
2781   if (!global_options_set.x_arm_structure_size_boundary)
2782     {
2783       if (TARGET_AAPCS_BASED)
2784         arm_structure_size_boundary = 8;
2785     }
2786   else
2787     {
2788       if (arm_structure_size_boundary != 8
2789           && arm_structure_size_boundary != 32
2790           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2791         {
2792           if (ARM_DOUBLEWORD_ALIGN)
2793             warning (0,
2794                      "structure size boundary can only be set to 8, 32 or 64");
2795           else
2796             warning (0, "structure size boundary can only be set to 8 or 32");
2797           arm_structure_size_boundary
2798             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2799         }
2800     }
2801
2802   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2803     {
2804       error ("RTP PIC is incompatible with Thumb");
2805       flag_pic = 0;
2806     }
2807
2808   /* If stack checking is disabled, we can use r10 as the PIC register,
2809      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
2810   if (flag_pic && TARGET_SINGLE_PIC_BASE)
2811     {
2812       if (TARGET_VXWORKS_RTP)
2813         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2814       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2815     }
2816
2817   if (flag_pic && TARGET_VXWORKS_RTP)
2818     arm_pic_register = 9;
2819
2820   if (arm_pic_register_string != NULL)
2821     {
2822       int pic_register = decode_reg_name (arm_pic_register_string);
2823
2824       if (!flag_pic)
2825         warning (0, "-mpic-register= is useless without -fpic");
2826
2827       /* Prevent the user from choosing an obviously stupid PIC register.  */
2828       else if (pic_register < 0 || call_used_regs[pic_register]
2829                || pic_register == HARD_FRAME_POINTER_REGNUM
2830                || pic_register == STACK_POINTER_REGNUM
2831                || pic_register >= PC_REGNUM
2832                || (TARGET_VXWORKS_RTP
2833                    && (unsigned int) pic_register != arm_pic_register))
2834         error ("unable to use '%s' for PIC register", arm_pic_register_string);
2835       else
2836         arm_pic_register = pic_register;
2837     }
2838
2839   if (TARGET_VXWORKS_RTP
2840       && !global_options_set.x_arm_pic_data_is_text_relative)
2841     arm_pic_data_is_text_relative = 0;
2842
2843   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
2844   if (fix_cm3_ldrd == 2)
2845     {
2846       if (arm_selected_cpu->core == cortexm3)
2847         fix_cm3_ldrd = 1;
2848       else
2849         fix_cm3_ldrd = 0;
2850     }
2851
2852   /* Enable -munaligned-access by default for
2853      - all ARMv6 architecture-based processors
2854      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2855      - ARMv8 architecture-base processors.
2856
2857      Disable -munaligned-access by default for
2858      - all pre-ARMv6 architecture-based processors
2859      - ARMv6-M architecture-based processors.  */
2860
2861   if (unaligned_access == 2)
2862     {
2863       if (arm_arch6 && (arm_arch_notm || arm_arch7))
2864         unaligned_access = 1;
2865       else
2866         unaligned_access = 0;
2867     }
2868   else if (unaligned_access == 1
2869            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2870     {
2871       warning (0, "target CPU does not support unaligned accesses");
2872       unaligned_access = 0;
2873     }
2874
2875   if (TARGET_THUMB1 && flag_schedule_insns)
2876     {
2877       /* Don't warn since it's on by default in -O2.  */
2878       flag_schedule_insns = 0;
2879     }
2880
2881   if (optimize_size)
2882     {
2883       /* If optimizing for size, bump the number of instructions that we
2884          are prepared to conditionally execute (even on a StrongARM).  */
2885       max_insns_skipped = 6;
2886     }
2887   else
2888     max_insns_skipped = current_tune->max_insns_skipped;
2889
2890   /* Hot/Cold partitioning is not currently supported, since we can't
2891      handle literal pool placement in that case.  */
2892   if (flag_reorder_blocks_and_partition)
2893     {
2894       inform (input_location,
2895               "-freorder-blocks-and-partition not supported on this architecture");
2896       flag_reorder_blocks_and_partition = 0;
2897       flag_reorder_blocks = 1;
2898     }
2899
2900   if (flag_pic)
2901     /* Hoisting PIC address calculations more aggressively provides a small,
2902        but measurable, size reduction for PIC code.  Therefore, we decrease
2903        the bar for unrestricted expression hoisting to the cost of PIC address
2904        calculation, which is 2 instructions.  */
2905     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2906                            global_options.x_param_values,
2907                            global_options_set.x_param_values);
2908
2909   /* ARM EABI defaults to strict volatile bitfields.  */
2910   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2911       && abi_version_at_least(2))
2912     flag_strict_volatile_bitfields = 1;
2913
2914   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2915      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
2916   if (flag_prefetch_loop_arrays < 0
2917       && HAVE_prefetch
2918       && optimize >= 3
2919       && current_tune->num_prefetch_slots > 0)
2920     flag_prefetch_loop_arrays = 1;
2921
2922   /* Set up parameters to be used in prefetching algorithm.  Do not override the
2923      defaults unless we are tuning for a core we have researched values for.  */
2924   if (current_tune->num_prefetch_slots > 0)
2925     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2926                            current_tune->num_prefetch_slots,
2927                            global_options.x_param_values,
2928                            global_options_set.x_param_values);
2929   if (current_tune->l1_cache_line_size >= 0)
2930     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2931                            current_tune->l1_cache_line_size,
2932                            global_options.x_param_values,
2933                            global_options_set.x_param_values);
2934   if (current_tune->l1_cache_size >= 0)
2935     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2936                            current_tune->l1_cache_size,
2937                            global_options.x_param_values,
2938                            global_options_set.x_param_values);
2939
2940   /* Use Neon to perform 64-bits operations rather than core
2941      registers.  */
2942   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2943   if (use_neon_for_64bits == 1)
2944      prefer_neon_for_64bits = true;
2945
2946   /* Use the alternative scheduling-pressure algorithm by default.  */
2947   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2948                          global_options.x_param_values,
2949                          global_options_set.x_param_values);
2950
2951   /* Disable shrink-wrap when optimizing function for size, since it tends to
2952      generate additional returns.  */
2953   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2954     flag_shrink_wrap = false;
2955   /* TBD: Dwarf info for apcs frame is not handled yet.  */
2956   if (TARGET_APCS_FRAME)
2957     flag_shrink_wrap = false;
2958
2959   /* We only support -mslow-flash-data on armv7-m targets.  */
2960   if (target_slow_flash_data
2961       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2962           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2963     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2964
2965   /* Currently, for slow flash data, we just disable literal pools.  */
2966   if (target_slow_flash_data)
2967     arm_disable_literal_pool = true;
2968
2969   /* Register global variables with the garbage collector.  */
2970   arm_add_gc_roots ();
2971 }
2972
2973 static void
2974 arm_add_gc_roots (void)
2975 {
2976   gcc_obstack_init(&minipool_obstack);
2977   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2978 }
2979 \f
2980 /* A table of known ARM exception types.
2981    For use with the interrupt function attribute.  */
2982
2983 typedef struct
2984 {
2985   const char *const arg;
2986   const unsigned long return_value;
2987 }
2988 isr_attribute_arg;
2989
2990 static const isr_attribute_arg isr_attribute_args [] =
2991 {
2992   { "IRQ",   ARM_FT_ISR },
2993   { "irq",   ARM_FT_ISR },
2994   { "FIQ",   ARM_FT_FIQ },
2995   { "fiq",   ARM_FT_FIQ },
2996   { "ABORT", ARM_FT_ISR },
2997   { "abort", ARM_FT_ISR },
2998   { "ABORT", ARM_FT_ISR },
2999   { "abort", ARM_FT_ISR },
3000   { "UNDEF", ARM_FT_EXCEPTION },
3001   { "undef", ARM_FT_EXCEPTION },
3002   { "SWI",   ARM_FT_EXCEPTION },
3003   { "swi",   ARM_FT_EXCEPTION },
3004   { NULL,    ARM_FT_NORMAL }
3005 };
3006
3007 /* Returns the (interrupt) function type of the current
3008    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3009
3010 static unsigned long
3011 arm_isr_value (tree argument)
3012 {
3013   const isr_attribute_arg * ptr;
3014   const char *              arg;
3015
3016   if (!arm_arch_notm)
3017     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3018
3019   /* No argument - default to IRQ.  */
3020   if (argument == NULL_TREE)
3021     return ARM_FT_ISR;
3022
3023   /* Get the value of the argument.  */
3024   if (TREE_VALUE (argument) == NULL_TREE
3025       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3026     return ARM_FT_UNKNOWN;
3027
3028   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3029
3030   /* Check it against the list of known arguments.  */
3031   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3032     if (streq (arg, ptr->arg))
3033       return ptr->return_value;
3034
3035   /* An unrecognized interrupt type.  */
3036   return ARM_FT_UNKNOWN;
3037 }
3038
3039 /* Computes the type of the current function.  */
3040
3041 static unsigned long
3042 arm_compute_func_type (void)
3043 {
3044   unsigned long type = ARM_FT_UNKNOWN;
3045   tree a;
3046   tree attr;
3047
3048   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3049
3050   /* Decide if the current function is volatile.  Such functions
3051      never return, and many memory cycles can be saved by not storing
3052      register values that will never be needed again.  This optimization
3053      was added to speed up context switching in a kernel application.  */
3054   if (optimize > 0
3055       && (TREE_NOTHROW (current_function_decl)
3056           || !(flag_unwind_tables
3057                || (flag_exceptions
3058                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3059       && TREE_THIS_VOLATILE (current_function_decl))
3060     type |= ARM_FT_VOLATILE;
3061
3062   if (cfun->static_chain_decl != NULL)
3063     type |= ARM_FT_NESTED;
3064
3065   attr = DECL_ATTRIBUTES (current_function_decl);
3066
3067   a = lookup_attribute ("naked", attr);
3068   if (a != NULL_TREE)
3069     type |= ARM_FT_NAKED;
3070
3071   a = lookup_attribute ("isr", attr);
3072   if (a == NULL_TREE)
3073     a = lookup_attribute ("interrupt", attr);
3074
3075   if (a == NULL_TREE)
3076     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3077   else
3078     type |= arm_isr_value (TREE_VALUE (a));
3079
3080   return type;
3081 }
3082
3083 /* Returns the type of the current function.  */
3084
3085 unsigned long
3086 arm_current_func_type (void)
3087 {
3088   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3089     cfun->machine->func_type = arm_compute_func_type ();
3090
3091   return cfun->machine->func_type;
3092 }
3093
3094 bool
3095 arm_allocate_stack_slots_for_args (void)
3096 {
3097   /* Naked functions should not allocate stack slots for arguments.  */
3098   return !IS_NAKED (arm_current_func_type ());
3099 }
3100
3101 static bool
3102 arm_warn_func_return (tree decl)
3103 {
3104   /* Naked functions are implemented entirely in assembly, including the
3105      return sequence, so suppress warnings about this.  */
3106   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3107 }
3108
3109 \f
3110 /* Output assembler code for a block containing the constant parts
3111    of a trampoline, leaving space for the variable parts.
3112
3113    On the ARM, (if r8 is the static chain regnum, and remembering that
3114    referencing pc adds an offset of 8) the trampoline looks like:
3115            ldr          r8, [pc, #0]
3116            ldr          pc, [pc]
3117            .word        static chain value
3118            .word        function's address
3119    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3120
3121 static void
3122 arm_asm_trampoline_template (FILE *f)
3123 {
3124   if (TARGET_ARM)
3125     {
3126       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3127       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3128     }
3129   else if (TARGET_THUMB2)
3130     {
3131       /* The Thumb-2 trampoline is similar to the arm implementation.
3132          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3133       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3134                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3135       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3136     }
3137   else
3138     {
3139       ASM_OUTPUT_ALIGN (f, 2);
3140       fprintf (f, "\t.code\t16\n");
3141       fprintf (f, ".Ltrampoline_start:\n");
3142       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3143       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3144       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3145       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3146       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3147       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3148     }
3149   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3150   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3151 }
3152
3153 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3154
3155 static void
3156 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3157 {
3158   rtx fnaddr, mem, a_tramp;
3159
3160   emit_block_move (m_tramp, assemble_trampoline_template (),
3161                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3162
3163   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3164   emit_move_insn (mem, chain_value);
3165
3166   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3167   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3168   emit_move_insn (mem, fnaddr);
3169
3170   a_tramp = XEXP (m_tramp, 0);
3171   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3172                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3173                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3174 }
3175
3176 /* Thumb trampolines should be entered in thumb mode, so set
3177    the bottom bit of the address.  */
3178
3179 static rtx
3180 arm_trampoline_adjust_address (rtx addr)
3181 {
3182   if (TARGET_THUMB)
3183     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3184                                 NULL, 0, OPTAB_LIB_WIDEN);
3185   return addr;
3186 }
3187 \f
3188 /* Return 1 if it is possible to return using a single instruction.
3189    If SIBLING is non-null, this is a test for a return before a sibling
3190    call.  SIBLING is the call insn, so we can examine its register usage.  */
3191
3192 int
3193 use_return_insn (int iscond, rtx sibling)
3194 {
3195   int regno;
3196   unsigned int func_type;
3197   unsigned long saved_int_regs;
3198   unsigned HOST_WIDE_INT stack_adjust;
3199   arm_stack_offsets *offsets;
3200
3201   /* Never use a return instruction before reload has run.  */
3202   if (!reload_completed)
3203     return 0;
3204
3205   func_type = arm_current_func_type ();
3206
3207   /* Naked, volatile and stack alignment functions need special
3208      consideration.  */
3209   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3210     return 0;
3211
3212   /* So do interrupt functions that use the frame pointer and Thumb
3213      interrupt functions.  */
3214   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3215     return 0;
3216
3217   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3218       && !optimize_function_for_size_p (cfun))
3219     return 0;
3220
3221   offsets = arm_get_frame_offsets ();
3222   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3223
3224   /* As do variadic functions.  */
3225   if (crtl->args.pretend_args_size
3226       || cfun->machine->uses_anonymous_args
3227       /* Or if the function calls __builtin_eh_return () */
3228       || crtl->calls_eh_return
3229       /* Or if the function calls alloca */
3230       || cfun->calls_alloca
3231       /* Or if there is a stack adjustment.  However, if the stack pointer
3232          is saved on the stack, we can use a pre-incrementing stack load.  */
3233       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3234                                  && stack_adjust == 4)))
3235     return 0;
3236
3237   saved_int_regs = offsets->saved_regs_mask;
3238
3239   /* Unfortunately, the insn
3240
3241        ldmib sp, {..., sp, ...}
3242
3243      triggers a bug on most SA-110 based devices, such that the stack
3244      pointer won't be correctly restored if the instruction takes a
3245      page fault.  We work around this problem by popping r3 along with
3246      the other registers, since that is never slower than executing
3247      another instruction.
3248
3249      We test for !arm_arch5 here, because code for any architecture
3250      less than this could potentially be run on one of the buggy
3251      chips.  */
3252   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3253     {
3254       /* Validate that r3 is a call-clobbered register (always true in
3255          the default abi) ...  */
3256       if (!call_used_regs[3])
3257         return 0;
3258
3259       /* ... that it isn't being used for a return value ... */
3260       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3261         return 0;
3262
3263       /* ... or for a tail-call argument ...  */
3264       if (sibling)
3265         {
3266           gcc_assert (CALL_P (sibling));
3267
3268           if (find_regno_fusage (sibling, USE, 3))
3269             return 0;
3270         }
3271
3272       /* ... and that there are no call-saved registers in r0-r2
3273          (always true in the default ABI).  */
3274       if (saved_int_regs & 0x7)
3275         return 0;
3276     }
3277
3278   /* Can't be done if interworking with Thumb, and any registers have been
3279      stacked.  */
3280   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3281     return 0;
3282
3283   /* On StrongARM, conditional returns are expensive if they aren't
3284      taken and multiple registers have been stacked.  */
3285   if (iscond && arm_tune_strongarm)
3286     {
3287       /* Conditional return when just the LR is stored is a simple
3288          conditional-load instruction, that's not expensive.  */
3289       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3290         return 0;
3291
3292       if (flag_pic
3293           && arm_pic_register != INVALID_REGNUM
3294           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3295         return 0;
3296     }
3297
3298   /* If there are saved registers but the LR isn't saved, then we need
3299      two instructions for the return.  */
3300   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3301     return 0;
3302
3303   /* Can't be done if any of the VFP regs are pushed,
3304      since this also requires an insn.  */
3305   if (TARGET_HARD_FLOAT && TARGET_VFP)
3306     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3307       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3308         return 0;
3309
3310   if (TARGET_REALLY_IWMMXT)
3311     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3312       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3313         return 0;
3314
3315   return 1;
3316 }
3317
3318 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3319    shrink-wrapping if possible.  This is the case if we need to emit a
3320    prologue, which we can test by looking at the offsets.  */
3321 bool
3322 use_simple_return_p (void)
3323 {
3324   arm_stack_offsets *offsets;
3325
3326   offsets = arm_get_frame_offsets ();
3327   return offsets->outgoing_args != 0;
3328 }
3329
3330 /* Return TRUE if int I is a valid immediate ARM constant.  */
3331
3332 int
3333 const_ok_for_arm (HOST_WIDE_INT i)
3334 {
3335   int lowbit;
3336
3337   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3338      be all zero, or all one.  */
3339   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3340       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3341           != ((~(unsigned HOST_WIDE_INT) 0)
3342               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3343     return FALSE;
3344
3345   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3346
3347   /* Fast return for 0 and small values.  We must do this for zero, since
3348      the code below can't handle that one case.  */
3349   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3350     return TRUE;
3351
3352   /* Get the number of trailing zeros.  */
3353   lowbit = ffs((int) i) - 1;
3354
3355   /* Only even shifts are allowed in ARM mode so round down to the
3356      nearest even number.  */
3357   if (TARGET_ARM)
3358     lowbit &= ~1;
3359
3360   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3361     return TRUE;
3362
3363   if (TARGET_ARM)
3364     {
3365       /* Allow rotated constants in ARM mode.  */
3366       if (lowbit <= 4
3367            && ((i & ~0xc000003f) == 0
3368                || (i & ~0xf000000f) == 0
3369                || (i & ~0xfc000003) == 0))
3370         return TRUE;
3371     }
3372   else
3373     {
3374       HOST_WIDE_INT v;
3375
3376       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3377       v = i & 0xff;
3378       v |= v << 16;
3379       if (i == v || i == (v | (v << 8)))
3380         return TRUE;
3381
3382       /* Allow repeated pattern 0xXY00XY00.  */
3383       v = i & 0xff00;
3384       v |= v << 16;
3385       if (i == v)
3386         return TRUE;
3387     }
3388
3389   return FALSE;
3390 }
3391
3392 /* Return true if I is a valid constant for the operation CODE.  */
3393 int
3394 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3395 {
3396   if (const_ok_for_arm (i))
3397     return 1;
3398
3399   switch (code)
3400     {
3401     case SET:
3402       /* See if we can use movw.  */
3403       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3404         return 1;
3405       else
3406         /* Otherwise, try mvn.  */
3407         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3408
3409     case PLUS:
3410       /* See if we can use addw or subw.  */
3411       if (TARGET_THUMB2
3412           && ((i & 0xfffff000) == 0
3413               || ((-i) & 0xfffff000) == 0))
3414         return 1;
3415       /* else fall through.  */
3416
3417     case COMPARE:
3418     case EQ:
3419     case NE:
3420     case GT:
3421     case LE:
3422     case LT:
3423     case GE:
3424     case GEU:
3425     case LTU:
3426     case GTU:
3427     case LEU:
3428     case UNORDERED:
3429     case ORDERED:
3430     case UNEQ:
3431     case UNGE:
3432     case UNLT:
3433     case UNGT:
3434     case UNLE:
3435       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3436
3437     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3438     case XOR:
3439       return 0;
3440
3441     case IOR:
3442       if (TARGET_THUMB2)
3443         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3444       return 0;
3445
3446     case AND:
3447       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3448
3449     default:
3450       gcc_unreachable ();
3451     }
3452 }
3453
3454 /* Return true if I is a valid di mode constant for the operation CODE.  */
3455 int
3456 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3457 {
3458   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3459   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3460   rtx hi = GEN_INT (hi_val);
3461   rtx lo = GEN_INT (lo_val);
3462
3463   if (TARGET_THUMB1)
3464     return 0;
3465
3466   switch (code)
3467     {
3468     case AND:
3469     case IOR:
3470     case XOR:
3471       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3472               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3473     case PLUS:
3474       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3475
3476     default:
3477       return 0;
3478     }
3479 }
3480
3481 /* Emit a sequence of insns to handle a large constant.
3482    CODE is the code of the operation required, it can be any of SET, PLUS,
3483    IOR, AND, XOR, MINUS;
3484    MODE is the mode in which the operation is being performed;
3485    VAL is the integer to operate on;
3486    SOURCE is the other operand (a register, or a null-pointer for SET);
3487    SUBTARGETS means it is safe to create scratch registers if that will
3488    either produce a simpler sequence, or we will want to cse the values.
3489    Return value is the number of insns emitted.  */
3490
3491 /* ??? Tweak this for thumb2.  */
3492 int
3493 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3494                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3495 {
3496   rtx cond;
3497
3498   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3499     cond = COND_EXEC_TEST (PATTERN (insn));
3500   else
3501     cond = NULL_RTX;
3502
3503   if (subtargets || code == SET
3504       || (REG_P (target) && REG_P (source)
3505           && REGNO (target) != REGNO (source)))
3506     {
3507       /* After arm_reorg has been called, we can't fix up expensive
3508          constants by pushing them into memory so we must synthesize
3509          them in-line, regardless of the cost.  This is only likely to
3510          be more costly on chips that have load delay slots and we are
3511          compiling without running the scheduler (so no splitting
3512          occurred before the final instruction emission).
3513
3514          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3515       */
3516       if (!cfun->machine->after_arm_reorg
3517           && !cond
3518           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3519                                 1, 0)
3520               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3521                  + (code != SET))))
3522         {
3523           if (code == SET)
3524             {
3525               /* Currently SET is the only monadic value for CODE, all
3526                  the rest are diadic.  */
3527               if (TARGET_USE_MOVT)
3528                 arm_emit_movpair (target, GEN_INT (val));
3529               else
3530                 emit_set_insn (target, GEN_INT (val));
3531
3532               return 1;
3533             }
3534           else
3535             {
3536               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3537
3538               if (TARGET_USE_MOVT)
3539                 arm_emit_movpair (temp, GEN_INT (val));
3540               else
3541                 emit_set_insn (temp, GEN_INT (val));
3542
3543               /* For MINUS, the value is subtracted from, since we never
3544                  have subtraction of a constant.  */
3545               if (code == MINUS)
3546                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3547               else
3548                 emit_set_insn (target,
3549                                gen_rtx_fmt_ee (code, mode, source, temp));
3550               return 2;
3551             }
3552         }
3553     }
3554
3555   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3556                            1);
3557 }
3558
3559 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3560    ARM/THUMB2 immediates, and add up to VAL.
3561    Thr function return value gives the number of insns required.  */
3562 static int
3563 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3564                             struct four_ints *return_sequence)
3565 {
3566   int best_consecutive_zeros = 0;
3567   int i;
3568   int best_start = 0;
3569   int insns1, insns2;
3570   struct four_ints tmp_sequence;
3571
3572   /* If we aren't targeting ARM, the best place to start is always at
3573      the bottom, otherwise look more closely.  */
3574   if (TARGET_ARM)
3575     {
3576       for (i = 0; i < 32; i += 2)
3577         {
3578           int consecutive_zeros = 0;
3579
3580           if (!(val & (3 << i)))
3581             {
3582               while ((i < 32) && !(val & (3 << i)))
3583                 {
3584                   consecutive_zeros += 2;
3585                   i += 2;
3586                 }
3587               if (consecutive_zeros > best_consecutive_zeros)
3588                 {
3589                   best_consecutive_zeros = consecutive_zeros;
3590                   best_start = i - consecutive_zeros;
3591                 }
3592               i -= 2;
3593             }
3594         }
3595     }
3596
3597   /* So long as it won't require any more insns to do so, it's
3598      desirable to emit a small constant (in bits 0...9) in the last
3599      insn.  This way there is more chance that it can be combined with
3600      a later addressing insn to form a pre-indexed load or store
3601      operation.  Consider:
3602
3603            *((volatile int *)0xe0000100) = 1;
3604            *((volatile int *)0xe0000110) = 2;
3605
3606      We want this to wind up as:
3607
3608             mov rA, #0xe0000000
3609             mov rB, #1
3610             str rB, [rA, #0x100]
3611             mov rB, #2
3612             str rB, [rA, #0x110]
3613
3614      rather than having to synthesize both large constants from scratch.
3615
3616      Therefore, we calculate how many insns would be required to emit
3617      the constant starting from `best_start', and also starting from
3618      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3619      yield a shorter sequence, we may as well use zero.  */
3620   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3621   if (best_start != 0
3622       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3623     {
3624       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3625       if (insns2 <= insns1)
3626         {
3627           *return_sequence = tmp_sequence;
3628           insns1 = insns2;
3629         }
3630     }
3631
3632   return insns1;
3633 }
3634
3635 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3636 static int
3637 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3638                              struct four_ints *return_sequence, int i)
3639 {
3640   int remainder = val & 0xffffffff;
3641   int insns = 0;
3642
3643   /* Try and find a way of doing the job in either two or three
3644      instructions.
3645
3646      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3647      location.  We start at position I.  This may be the MSB, or
3648      optimial_immediate_sequence may have positioned it at the largest block
3649      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3650      wrapping around to the top of the word when we drop off the bottom.
3651      In the worst case this code should produce no more than four insns.
3652
3653      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3654      constants, shifted to any arbitrary location.  We should always start
3655      at the MSB.  */
3656   do
3657     {
3658       int end;
3659       unsigned int b1, b2, b3, b4;
3660       unsigned HOST_WIDE_INT result;
3661       int loc;
3662
3663       gcc_assert (insns < 4);
3664
3665       if (i <= 0)
3666         i += 32;
3667
3668       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3669       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3670         {
3671           loc = i;
3672           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3673             /* We can use addw/subw for the last 12 bits.  */
3674             result = remainder;
3675           else
3676             {
3677               /* Use an 8-bit shifted/rotated immediate.  */
3678               end = i - 8;
3679               if (end < 0)
3680                 end += 32;
3681               result = remainder & ((0x0ff << end)
3682                                    | ((i < end) ? (0xff >> (32 - end))
3683                                                 : 0));
3684               i -= 8;
3685             }
3686         }
3687       else
3688         {
3689           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3690              arbitrary shifts.  */
3691           i -= TARGET_ARM ? 2 : 1;
3692           continue;
3693         }
3694
3695       /* Next, see if we can do a better job with a thumb2 replicated
3696          constant.
3697
3698          We do it this way around to catch the cases like 0x01F001E0 where
3699          two 8-bit immediates would work, but a replicated constant would
3700          make it worse.
3701
3702          TODO: 16-bit constants that don't clear all the bits, but still win.
3703          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3704       if (TARGET_THUMB2)
3705         {
3706           b1 = (remainder & 0xff000000) >> 24;
3707           b2 = (remainder & 0x00ff0000) >> 16;
3708           b3 = (remainder & 0x0000ff00) >> 8;
3709           b4 = remainder & 0xff;
3710
3711           if (loc > 24)
3712             {
3713               /* The 8-bit immediate already found clears b1 (and maybe b2),
3714                  but must leave b3 and b4 alone.  */
3715
3716               /* First try to find a 32-bit replicated constant that clears
3717                  almost everything.  We can assume that we can't do it in one,
3718                  or else we wouldn't be here.  */
3719               unsigned int tmp = b1 & b2 & b3 & b4;
3720               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3721                                   + (tmp << 24);
3722               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3723                                             + (tmp == b3) + (tmp == b4);
3724               if (tmp
3725                   && (matching_bytes >= 3
3726                       || (matching_bytes == 2
3727                           && const_ok_for_op (remainder & ~tmp2, code))))
3728                 {
3729                   /* At least 3 of the bytes match, and the fourth has at
3730                      least as many bits set, or two of the bytes match
3731                      and it will only require one more insn to finish.  */
3732                   result = tmp2;
3733                   i = tmp != b1 ? 32
3734                       : tmp != b2 ? 24
3735                       : tmp != b3 ? 16
3736                       : 8;
3737                 }
3738
3739               /* Second, try to find a 16-bit replicated constant that can
3740                  leave three of the bytes clear.  If b2 or b4 is already
3741                  zero, then we can.  If the 8-bit from above would not
3742                  clear b2 anyway, then we still win.  */
3743               else if (b1 == b3 && (!b2 || !b4
3744                                || (remainder & 0x00ff0000 & ~result)))
3745                 {
3746                   result = remainder & 0xff00ff00;
3747                   i = 24;
3748                 }
3749             }
3750           else if (loc > 16)
3751             {
3752               /* The 8-bit immediate already found clears b2 (and maybe b3)
3753                  and we don't get here unless b1 is alredy clear, but it will
3754                  leave b4 unchanged.  */
3755
3756               /* If we can clear b2 and b4 at once, then we win, since the
3757                  8-bits couldn't possibly reach that far.  */
3758               if (b2 == b4)
3759                 {
3760                   result = remainder & 0x00ff00ff;
3761                   i = 16;
3762                 }
3763             }
3764         }
3765
3766       return_sequence->i[insns++] = result;
3767       remainder &= ~result;
3768
3769       if (code == SET || code == MINUS)
3770         code = PLUS;
3771     }
3772   while (remainder);
3773
3774   return insns;
3775 }
3776
3777 /* Emit an instruction with the indicated PATTERN.  If COND is
3778    non-NULL, conditionalize the execution of the instruction on COND
3779    being true.  */
3780
3781 static void
3782 emit_constant_insn (rtx cond, rtx pattern)
3783 {
3784   if (cond)
3785     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3786   emit_insn (pattern);
3787 }
3788
3789 /* As above, but extra parameter GENERATE which, if clear, suppresses
3790    RTL generation.  */
3791
3792 static int
3793 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3794                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3795                   int generate)
3796 {
3797   int can_invert = 0;
3798   int can_negate = 0;
3799   int final_invert = 0;
3800   int i;
3801   int set_sign_bit_copies = 0;
3802   int clear_sign_bit_copies = 0;
3803   int clear_zero_bit_copies = 0;
3804   int set_zero_bit_copies = 0;
3805   int insns = 0, neg_insns, inv_insns;
3806   unsigned HOST_WIDE_INT temp1, temp2;
3807   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3808   struct four_ints *immediates;
3809   struct four_ints pos_immediates, neg_immediates, inv_immediates;
3810
3811   /* Find out which operations are safe for a given CODE.  Also do a quick
3812      check for degenerate cases; these can occur when DImode operations
3813      are split.  */
3814   switch (code)
3815     {
3816     case SET:
3817       can_invert = 1;
3818       break;
3819
3820     case PLUS:
3821       can_negate = 1;
3822       break;
3823
3824     case IOR:
3825       if (remainder == 0xffffffff)
3826         {
3827           if (generate)
3828             emit_constant_insn (cond,
3829                                 gen_rtx_SET (VOIDmode, target,
3830                                              GEN_INT (ARM_SIGN_EXTEND (val))));
3831           return 1;
3832         }
3833
3834       if (remainder == 0)
3835         {
3836           if (reload_completed && rtx_equal_p (target, source))
3837             return 0;
3838
3839           if (generate)
3840             emit_constant_insn (cond,
3841                                 gen_rtx_SET (VOIDmode, target, source));
3842           return 1;
3843         }
3844       break;
3845
3846     case AND:
3847       if (remainder == 0)
3848         {
3849           if (generate)
3850             emit_constant_insn (cond,
3851                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
3852           return 1;
3853         }
3854       if (remainder == 0xffffffff)
3855         {
3856           if (reload_completed && rtx_equal_p (target, source))
3857             return 0;
3858           if (generate)
3859             emit_constant_insn (cond,
3860                                 gen_rtx_SET (VOIDmode, target, source));
3861           return 1;
3862         }
3863       can_invert = 1;
3864       break;
3865
3866     case XOR:
3867       if (remainder == 0)
3868         {
3869           if (reload_completed && rtx_equal_p (target, source))
3870             return 0;
3871           if (generate)
3872             emit_constant_insn (cond,
3873                                 gen_rtx_SET (VOIDmode, target, source));
3874           return 1;
3875         }
3876
3877       if (remainder == 0xffffffff)
3878         {
3879           if (generate)
3880             emit_constant_insn (cond,
3881                                 gen_rtx_SET (VOIDmode, target,
3882                                              gen_rtx_NOT (mode, source)));
3883           return 1;
3884         }
3885       final_invert = 1;
3886       break;
3887
3888     case MINUS:
3889       /* We treat MINUS as (val - source), since (source - val) is always
3890          passed as (source + (-val)).  */
3891       if (remainder == 0)
3892         {
3893           if (generate)
3894             emit_constant_insn (cond,
3895                                 gen_rtx_SET (VOIDmode, target,
3896                                              gen_rtx_NEG (mode, source)));
3897           return 1;
3898         }
3899       if (const_ok_for_arm (val))
3900         {
3901           if (generate)
3902             emit_constant_insn (cond,
3903                                 gen_rtx_SET (VOIDmode, target,
3904                                              gen_rtx_MINUS (mode, GEN_INT (val),
3905                                                             source)));
3906           return 1;
3907         }
3908
3909       break;
3910
3911     default:
3912       gcc_unreachable ();
3913     }
3914
3915   /* If we can do it in one insn get out quickly.  */
3916   if (const_ok_for_op (val, code))
3917     {
3918       if (generate)
3919         emit_constant_insn (cond,
3920                             gen_rtx_SET (VOIDmode, target,
3921                                          (source
3922                                           ? gen_rtx_fmt_ee (code, mode, source,
3923                                                             GEN_INT (val))
3924                                           : GEN_INT (val))));
3925       return 1;
3926     }
3927
3928   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3929      insn.  */
3930   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3931       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3932     {
3933       if (generate)
3934         {
3935           if (mode == SImode && i == 16)
3936             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3937                smaller insn.  */
3938             emit_constant_insn (cond,
3939                                 gen_zero_extendhisi2
3940                                 (target, gen_lowpart (HImode, source)));
3941           else
3942             /* Extz only supports SImode, but we can coerce the operands
3943                into that mode.  */
3944             emit_constant_insn (cond,
3945                                 gen_extzv_t2 (gen_lowpart (SImode, target),
3946                                               gen_lowpart (SImode, source),
3947                                               GEN_INT (i), const0_rtx));
3948         }
3949
3950       return 1;
3951     }
3952
3953   /* Calculate a few attributes that may be useful for specific
3954      optimizations.  */
3955   /* Count number of leading zeros.  */
3956   for (i = 31; i >= 0; i--)
3957     {
3958       if ((remainder & (1 << i)) == 0)
3959         clear_sign_bit_copies++;
3960       else
3961         break;
3962     }
3963
3964   /* Count number of leading 1's.  */
3965   for (i = 31; i >= 0; i--)
3966     {
3967       if ((remainder & (1 << i)) != 0)
3968         set_sign_bit_copies++;
3969       else
3970         break;
3971     }
3972
3973   /* Count number of trailing zero's.  */
3974   for (i = 0; i <= 31; i++)
3975     {
3976       if ((remainder & (1 << i)) == 0)
3977         clear_zero_bit_copies++;
3978       else
3979         break;
3980     }
3981
3982   /* Count number of trailing 1's.  */
3983   for (i = 0; i <= 31; i++)
3984     {
3985       if ((remainder & (1 << i)) != 0)
3986         set_zero_bit_copies++;
3987       else
3988         break;
3989     }
3990
3991   switch (code)
3992     {
3993     case SET:
3994       /* See if we can do this by sign_extending a constant that is known
3995          to be negative.  This is a good, way of doing it, since the shift
3996          may well merge into a subsequent insn.  */
3997       if (set_sign_bit_copies > 1)
3998         {
3999           if (const_ok_for_arm
4000               (temp1 = ARM_SIGN_EXTEND (remainder
4001                                         << (set_sign_bit_copies - 1))))
4002             {
4003               if (generate)
4004                 {
4005                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4006                   emit_constant_insn (cond,
4007                                       gen_rtx_SET (VOIDmode, new_src,
4008                                                    GEN_INT (temp1)));
4009                   emit_constant_insn (cond,
4010                                       gen_ashrsi3 (target, new_src,
4011                                                    GEN_INT (set_sign_bit_copies - 1)));
4012                 }
4013               return 2;
4014             }
4015           /* For an inverted constant, we will need to set the low bits,
4016              these will be shifted out of harm's way.  */
4017           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4018           if (const_ok_for_arm (~temp1))
4019             {
4020               if (generate)
4021                 {
4022                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4023                   emit_constant_insn (cond,
4024                                       gen_rtx_SET (VOIDmode, new_src,
4025                                                    GEN_INT (temp1)));
4026                   emit_constant_insn (cond,
4027                                       gen_ashrsi3 (target, new_src,
4028                                                    GEN_INT (set_sign_bit_copies - 1)));
4029                 }
4030               return 2;
4031             }
4032         }
4033
4034       /* See if we can calculate the value as the difference between two
4035          valid immediates.  */
4036       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4037         {
4038           int topshift = clear_sign_bit_copies & ~1;
4039
4040           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4041                                    & (0xff000000 >> topshift));
4042
4043           /* If temp1 is zero, then that means the 9 most significant
4044              bits of remainder were 1 and we've caused it to overflow.
4045              When topshift is 0 we don't need to do anything since we
4046              can borrow from 'bit 32'.  */
4047           if (temp1 == 0 && topshift != 0)
4048             temp1 = 0x80000000 >> (topshift - 1);
4049
4050           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4051
4052           if (const_ok_for_arm (temp2))
4053             {
4054               if (generate)
4055                 {
4056                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4057                   emit_constant_insn (cond,
4058                                       gen_rtx_SET (VOIDmode, new_src,
4059                                                    GEN_INT (temp1)));
4060                   emit_constant_insn (cond,
4061                                       gen_addsi3 (target, new_src,
4062                                                   GEN_INT (-temp2)));
4063                 }
4064
4065               return 2;
4066             }
4067         }
4068
4069       /* See if we can generate this by setting the bottom (or the top)
4070          16 bits, and then shifting these into the other half of the
4071          word.  We only look for the simplest cases, to do more would cost
4072          too much.  Be careful, however, not to generate this when the
4073          alternative would take fewer insns.  */
4074       if (val & 0xffff0000)
4075         {
4076           temp1 = remainder & 0xffff0000;
4077           temp2 = remainder & 0x0000ffff;
4078
4079           /* Overlaps outside this range are best done using other methods.  */
4080           for (i = 9; i < 24; i++)
4081             {
4082               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4083                   && !const_ok_for_arm (temp2))
4084                 {
4085                   rtx new_src = (subtargets
4086                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4087                                  : target);
4088                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4089                                             source, subtargets, generate);
4090                   source = new_src;
4091                   if (generate)
4092                     emit_constant_insn
4093                       (cond,
4094                        gen_rtx_SET
4095                        (VOIDmode, target,
4096                         gen_rtx_IOR (mode,
4097                                      gen_rtx_ASHIFT (mode, source,
4098                                                      GEN_INT (i)),
4099                                      source)));
4100                   return insns + 1;
4101                 }
4102             }
4103
4104           /* Don't duplicate cases already considered.  */
4105           for (i = 17; i < 24; i++)
4106             {
4107               if (((temp1 | (temp1 >> i)) == remainder)
4108                   && !const_ok_for_arm (temp1))
4109                 {
4110                   rtx new_src = (subtargets
4111                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4112                                  : target);
4113                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4114                                             source, subtargets, generate);
4115                   source = new_src;
4116                   if (generate)
4117                     emit_constant_insn
4118                       (cond,
4119                        gen_rtx_SET (VOIDmode, target,
4120                                     gen_rtx_IOR
4121                                     (mode,
4122                                      gen_rtx_LSHIFTRT (mode, source,
4123                                                        GEN_INT (i)),
4124                                      source)));
4125                   return insns + 1;
4126                 }
4127             }
4128         }
4129       break;
4130
4131     case IOR:
4132     case XOR:
4133       /* If we have IOR or XOR, and the constant can be loaded in a
4134          single instruction, and we can find a temporary to put it in,
4135          then this can be done in two instructions instead of 3-4.  */
4136       if (subtargets
4137           /* TARGET can't be NULL if SUBTARGETS is 0 */
4138           || (reload_completed && !reg_mentioned_p (target, source)))
4139         {
4140           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4141             {
4142               if (generate)
4143                 {
4144                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4145
4146                   emit_constant_insn (cond,
4147                                       gen_rtx_SET (VOIDmode, sub,
4148                                                    GEN_INT (val)));
4149                   emit_constant_insn (cond,
4150                                       gen_rtx_SET (VOIDmode, target,
4151                                                    gen_rtx_fmt_ee (code, mode,
4152                                                                    source, sub)));
4153                 }
4154               return 2;
4155             }
4156         }
4157
4158       if (code == XOR)
4159         break;
4160
4161       /*  Convert.
4162           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4163                              and the remainder 0s for e.g. 0xfff00000)
4164           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4165
4166           This can be done in 2 instructions by using shifts with mov or mvn.
4167           e.g. for
4168           x = x | 0xfff00000;
4169           we generate.
4170           mvn   r0, r0, asl #12
4171           mvn   r0, r0, lsr #12  */
4172       if (set_sign_bit_copies > 8
4173           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4174         {
4175           if (generate)
4176             {
4177               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4178               rtx shift = GEN_INT (set_sign_bit_copies);
4179
4180               emit_constant_insn
4181                 (cond,
4182                  gen_rtx_SET (VOIDmode, sub,
4183                               gen_rtx_NOT (mode,
4184                                            gen_rtx_ASHIFT (mode,
4185                                                            source,
4186                                                            shift))));
4187               emit_constant_insn
4188                 (cond,
4189                  gen_rtx_SET (VOIDmode, target,
4190                               gen_rtx_NOT (mode,
4191                                            gen_rtx_LSHIFTRT (mode, sub,
4192                                                              shift))));
4193             }
4194           return 2;
4195         }
4196
4197       /* Convert
4198           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4199            to
4200           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4201
4202           For eg. r0 = r0 | 0xfff
4203                mvn      r0, r0, lsr #12
4204                mvn      r0, r0, asl #12
4205
4206       */
4207       if (set_zero_bit_copies > 8
4208           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4209         {
4210           if (generate)
4211             {
4212               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4213               rtx shift = GEN_INT (set_zero_bit_copies);
4214
4215               emit_constant_insn
4216                 (cond,
4217                  gen_rtx_SET (VOIDmode, sub,
4218                               gen_rtx_NOT (mode,
4219                                            gen_rtx_LSHIFTRT (mode,
4220                                                              source,
4221                                                              shift))));
4222               emit_constant_insn
4223                 (cond,
4224                  gen_rtx_SET (VOIDmode, target,
4225                               gen_rtx_NOT (mode,
4226                                            gen_rtx_ASHIFT (mode, sub,
4227                                                            shift))));
4228             }
4229           return 2;
4230         }
4231
4232       /* This will never be reached for Thumb2 because orn is a valid
4233          instruction. This is for Thumb1 and the ARM 32 bit cases.
4234
4235          x = y | constant (such that ~constant is a valid constant)
4236          Transform this to
4237          x = ~(~y & ~constant).
4238       */
4239       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4240         {
4241           if (generate)
4242             {
4243               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4244               emit_constant_insn (cond,
4245                                   gen_rtx_SET (VOIDmode, sub,
4246                                                gen_rtx_NOT (mode, source)));
4247               source = sub;
4248               if (subtargets)
4249                 sub = gen_reg_rtx (mode);
4250               emit_constant_insn (cond,
4251                                   gen_rtx_SET (VOIDmode, sub,
4252                                                gen_rtx_AND (mode, source,
4253                                                             GEN_INT (temp1))));
4254               emit_constant_insn (cond,
4255                                   gen_rtx_SET (VOIDmode, target,
4256                                                gen_rtx_NOT (mode, sub)));
4257             }
4258           return 3;
4259         }
4260       break;
4261
4262     case AND:
4263       /* See if two shifts will do 2 or more insn's worth of work.  */
4264       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4265         {
4266           HOST_WIDE_INT shift_mask = ((0xffffffff
4267                                        << (32 - clear_sign_bit_copies))
4268                                       & 0xffffffff);
4269
4270           if ((remainder | shift_mask) != 0xffffffff)
4271             {
4272               if (generate)
4273                 {
4274                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4275                   insns = arm_gen_constant (AND, mode, cond,
4276                                             remainder | shift_mask,
4277                                             new_src, source, subtargets, 1);
4278                   source = new_src;
4279                 }
4280               else
4281                 {
4282                   rtx targ = subtargets ? NULL_RTX : target;
4283                   insns = arm_gen_constant (AND, mode, cond,
4284                                             remainder | shift_mask,
4285                                             targ, source, subtargets, 0);
4286                 }
4287             }
4288
4289           if (generate)
4290             {
4291               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4292               rtx shift = GEN_INT (clear_sign_bit_copies);
4293
4294               emit_insn (gen_ashlsi3 (new_src, source, shift));
4295               emit_insn (gen_lshrsi3 (target, new_src, shift));
4296             }
4297
4298           return insns + 2;
4299         }
4300
4301       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4302         {
4303           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4304
4305           if ((remainder | shift_mask) != 0xffffffff)
4306             {
4307               if (generate)
4308                 {
4309                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4310
4311                   insns = arm_gen_constant (AND, mode, cond,
4312                                             remainder | shift_mask,
4313                                             new_src, source, subtargets, 1);
4314                   source = new_src;
4315                 }
4316               else
4317                 {
4318                   rtx targ = subtargets ? NULL_RTX : target;
4319
4320                   insns = arm_gen_constant (AND, mode, cond,
4321                                             remainder | shift_mask,
4322                                             targ, source, subtargets, 0);
4323                 }
4324             }
4325
4326           if (generate)
4327             {
4328               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4329               rtx shift = GEN_INT (clear_zero_bit_copies);
4330
4331               emit_insn (gen_lshrsi3 (new_src, source, shift));
4332               emit_insn (gen_ashlsi3 (target, new_src, shift));
4333             }
4334
4335           return insns + 2;
4336         }
4337
4338       break;
4339
4340     default:
4341       break;
4342     }
4343
4344   /* Calculate what the instruction sequences would be if we generated it
4345      normally, negated, or inverted.  */
4346   if (code == AND)
4347     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4348     insns = 99;
4349   else
4350     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4351
4352   if (can_negate)
4353     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4354                                             &neg_immediates);
4355   else
4356     neg_insns = 99;
4357
4358   if (can_invert || final_invert)
4359     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4360                                             &inv_immediates);
4361   else
4362     inv_insns = 99;
4363
4364   immediates = &pos_immediates;
4365
4366   /* Is the negated immediate sequence more efficient?  */
4367   if (neg_insns < insns && neg_insns <= inv_insns)
4368     {
4369       insns = neg_insns;
4370       immediates = &neg_immediates;
4371     }
4372   else
4373     can_negate = 0;
4374
4375   /* Is the inverted immediate sequence more efficient?
4376      We must allow for an extra NOT instruction for XOR operations, although
4377      there is some chance that the final 'mvn' will get optimized later.  */
4378   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4379     {
4380       insns = inv_insns;
4381       immediates = &inv_immediates;
4382     }
4383   else
4384     {
4385       can_invert = 0;
4386       final_invert = 0;
4387     }
4388
4389   /* Now output the chosen sequence as instructions.  */
4390   if (generate)
4391     {
4392       for (i = 0; i < insns; i++)
4393         {
4394           rtx new_src, temp1_rtx;
4395
4396           temp1 = immediates->i[i];
4397
4398           if (code == SET || code == MINUS)
4399             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4400           else if ((final_invert || i < (insns - 1)) && subtargets)
4401             new_src = gen_reg_rtx (mode);
4402           else
4403             new_src = target;
4404
4405           if (can_invert)
4406             temp1 = ~temp1;
4407           else if (can_negate)
4408             temp1 = -temp1;
4409
4410           temp1 = trunc_int_for_mode (temp1, mode);
4411           temp1_rtx = GEN_INT (temp1);
4412
4413           if (code == SET)
4414             ;
4415           else if (code == MINUS)
4416             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4417           else
4418             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4419
4420           emit_constant_insn (cond,
4421                               gen_rtx_SET (VOIDmode, new_src,
4422                                            temp1_rtx));
4423           source = new_src;
4424
4425           if (code == SET)
4426             {
4427               can_negate = can_invert;
4428               can_invert = 0;
4429               code = PLUS;
4430             }
4431           else if (code == MINUS)
4432             code = PLUS;
4433         }
4434     }
4435
4436   if (final_invert)
4437     {
4438       if (generate)
4439         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4440                                                gen_rtx_NOT (mode, source)));
4441       insns++;
4442     }
4443
4444   return insns;
4445 }
4446
4447 /* Canonicalize a comparison so that we are more likely to recognize it.
4448    This can be done for a few constant compares, where we can make the
4449    immediate value easier to load.  */
4450
4451 static void
4452 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4453                              bool op0_preserve_value)
4454 {
4455   enum machine_mode mode;
4456   unsigned HOST_WIDE_INT i, maxval;
4457
4458   mode = GET_MODE (*op0);
4459   if (mode == VOIDmode)
4460     mode = GET_MODE (*op1);
4461
4462   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4463
4464   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4465      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4466      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4467      for GTU/LEU in Thumb mode.  */
4468   if (mode == DImode)
4469     {
4470       rtx tem;
4471
4472       if (*code == GT || *code == LE
4473           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4474         {
4475           /* Missing comparison.  First try to use an available
4476              comparison.  */
4477           if (CONST_INT_P (*op1))
4478             {
4479               i = INTVAL (*op1);
4480               switch (*code)
4481                 {
4482                 case GT:
4483                 case LE:
4484                   if (i != maxval
4485                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4486                     {
4487                       *op1 = GEN_INT (i + 1);
4488                       *code = *code == GT ? GE : LT;
4489                       return;
4490                     }
4491                   break;
4492                 case GTU:
4493                 case LEU:
4494                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4495                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4496                     {
4497                       *op1 = GEN_INT (i + 1);
4498                       *code = *code == GTU ? GEU : LTU;
4499                       return;
4500                     }
4501                   break;
4502                 default:
4503                   gcc_unreachable ();
4504                 }
4505             }
4506
4507           /* If that did not work, reverse the condition.  */
4508           if (!op0_preserve_value)
4509             {
4510               tem = *op0;
4511               *op0 = *op1;
4512               *op1 = tem;
4513               *code = (int)swap_condition ((enum rtx_code)*code);
4514             }
4515         }
4516       return;
4517     }
4518
4519   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4520      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4521      to facilitate possible combining with a cmp into 'ands'.  */
4522   if (mode == SImode
4523       && GET_CODE (*op0) == ZERO_EXTEND
4524       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4525       && GET_MODE (XEXP (*op0, 0)) == QImode
4526       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4527       && subreg_lowpart_p (XEXP (*op0, 0))
4528       && *op1 == const0_rtx)
4529     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4530                         GEN_INT (255));
4531
4532   /* Comparisons smaller than DImode.  Only adjust comparisons against
4533      an out-of-range constant.  */
4534   if (!CONST_INT_P (*op1)
4535       || const_ok_for_arm (INTVAL (*op1))
4536       || const_ok_for_arm (- INTVAL (*op1)))
4537     return;
4538
4539   i = INTVAL (*op1);
4540
4541   switch (*code)
4542     {
4543     case EQ:
4544     case NE:
4545       return;
4546
4547     case GT:
4548     case LE:
4549       if (i != maxval
4550           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4551         {
4552           *op1 = GEN_INT (i + 1);
4553           *code = *code == GT ? GE : LT;
4554           return;
4555         }
4556       break;
4557
4558     case GE:
4559     case LT:
4560       if (i != ~maxval
4561           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4562         {
4563           *op1 = GEN_INT (i - 1);
4564           *code = *code == GE ? GT : LE;
4565           return;
4566         }
4567       break;
4568
4569     case GTU:
4570     case LEU:
4571       if (i != ~((unsigned HOST_WIDE_INT) 0)
4572           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4573         {
4574           *op1 = GEN_INT (i + 1);
4575           *code = *code == GTU ? GEU : LTU;
4576           return;
4577         }
4578       break;
4579
4580     case GEU:
4581     case LTU:
4582       if (i != 0
4583           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4584         {
4585           *op1 = GEN_INT (i - 1);
4586           *code = *code == GEU ? GTU : LEU;
4587           return;
4588         }
4589       break;
4590
4591     default:
4592       gcc_unreachable ();
4593     }
4594 }
4595
4596
4597 /* Define how to find the value returned by a function.  */
4598
4599 static rtx
4600 arm_function_value(const_tree type, const_tree func,
4601                    bool outgoing ATTRIBUTE_UNUSED)
4602 {
4603   enum machine_mode mode;
4604   int unsignedp ATTRIBUTE_UNUSED;
4605   rtx r ATTRIBUTE_UNUSED;
4606
4607   mode = TYPE_MODE (type);
4608
4609   if (TARGET_AAPCS_BASED)
4610     return aapcs_allocate_return_reg (mode, type, func);
4611
4612   /* Promote integer types.  */
4613   if (INTEGRAL_TYPE_P (type))
4614     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4615
4616   /* Promotes small structs returned in a register to full-word size
4617      for big-endian AAPCS.  */
4618   if (arm_return_in_msb (type))
4619     {
4620       HOST_WIDE_INT size = int_size_in_bytes (type);
4621       if (size % UNITS_PER_WORD != 0)
4622         {
4623           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4624           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4625         }
4626     }
4627
4628   return arm_libcall_value_1 (mode);
4629 }
4630
4631 /* libcall hashtable helpers.  */
4632
4633 struct libcall_hasher : typed_noop_remove <rtx_def>
4634 {
4635   typedef rtx_def value_type;
4636   typedef rtx_def compare_type;
4637   static inline hashval_t hash (const value_type *);
4638   static inline bool equal (const value_type *, const compare_type *);
4639   static inline void remove (value_type *);
4640 };
4641
4642 inline bool
4643 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4644 {
4645   return rtx_equal_p (p1, p2);
4646 }
4647
4648 inline hashval_t
4649 libcall_hasher::hash (const value_type *p1)
4650 {
4651   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4652 }
4653
4654 typedef hash_table <libcall_hasher> libcall_table_type;
4655
4656 static void
4657 add_libcall (libcall_table_type htab, rtx libcall)
4658 {
4659   *htab.find_slot (libcall, INSERT) = libcall;
4660 }
4661
4662 static bool
4663 arm_libcall_uses_aapcs_base (const_rtx libcall)
4664 {
4665   static bool init_done = false;
4666   static libcall_table_type libcall_htab;
4667
4668   if (!init_done)
4669     {
4670       init_done = true;
4671
4672       libcall_htab.create (31);
4673       add_libcall (libcall_htab,
4674                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4675       add_libcall (libcall_htab,
4676                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4677       add_libcall (libcall_htab,
4678                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4679       add_libcall (libcall_htab,
4680                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4681
4682       add_libcall (libcall_htab,
4683                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4684       add_libcall (libcall_htab,
4685                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4686       add_libcall (libcall_htab,
4687                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4688       add_libcall (libcall_htab,
4689                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4690
4691       add_libcall (libcall_htab,
4692                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4693       add_libcall (libcall_htab,
4694                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4695       add_libcall (libcall_htab,
4696                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4697       add_libcall (libcall_htab,
4698                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4699       add_libcall (libcall_htab,
4700                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4701       add_libcall (libcall_htab,
4702                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4703       add_libcall (libcall_htab,
4704                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4705       add_libcall (libcall_htab,
4706                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4707
4708       /* Values from double-precision helper functions are returned in core
4709          registers if the selected core only supports single-precision
4710          arithmetic, even if we are using the hard-float ABI.  The same is
4711          true for single-precision helpers, but we will never be using the
4712          hard-float ABI on a CPU which doesn't support single-precision
4713          operations in hardware.  */
4714       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4715       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4716       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4717       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4718       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4719       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4720       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4721       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4722       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4723       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4724       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4725       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4726                                                         SFmode));
4727       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4728                                                         DFmode));
4729     }
4730
4731   return libcall && libcall_htab.find (libcall) != NULL;
4732 }
4733
4734 static rtx
4735 arm_libcall_value_1 (enum machine_mode mode)
4736 {
4737   if (TARGET_AAPCS_BASED)
4738     return aapcs_libcall_value (mode);
4739   else if (TARGET_IWMMXT_ABI
4740            && arm_vector_mode_supported_p (mode))
4741     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4742   else
4743     return gen_rtx_REG (mode, ARG_REGISTER (1));
4744 }
4745
4746 /* Define how to find the value returned by a library function
4747    assuming the value has mode MODE.  */
4748
4749 static rtx
4750 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4751 {
4752   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4753       && GET_MODE_CLASS (mode) == MODE_FLOAT)
4754     {
4755       /* The following libcalls return their result in integer registers,
4756          even though they return a floating point value.  */
4757       if (arm_libcall_uses_aapcs_base (libcall))
4758         return gen_rtx_REG (mode, ARG_REGISTER(1));
4759
4760     }
4761
4762   return arm_libcall_value_1 (mode);
4763 }
4764
4765 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
4766
4767 static bool
4768 arm_function_value_regno_p (const unsigned int regno)
4769 {
4770   if (regno == ARG_REGISTER (1)
4771       || (TARGET_32BIT
4772           && TARGET_AAPCS_BASED
4773           && TARGET_VFP
4774           && TARGET_HARD_FLOAT
4775           && regno == FIRST_VFP_REGNUM)
4776       || (TARGET_IWMMXT_ABI
4777           && regno == FIRST_IWMMXT_REGNUM))
4778     return true;
4779
4780   return false;
4781 }
4782
4783 /* Determine the amount of memory needed to store the possible return
4784    registers of an untyped call.  */
4785 int
4786 arm_apply_result_size (void)
4787 {
4788   int size = 16;
4789
4790   if (TARGET_32BIT)
4791     {
4792       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4793         size += 32;
4794       if (TARGET_IWMMXT_ABI)
4795         size += 8;
4796     }
4797
4798   return size;
4799 }
4800
4801 /* Decide whether TYPE should be returned in memory (true)
4802    or in a register (false).  FNTYPE is the type of the function making
4803    the call.  */
4804 static bool
4805 arm_return_in_memory (const_tree type, const_tree fntype)
4806 {
4807   HOST_WIDE_INT size;
4808
4809   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
4810
4811   if (TARGET_AAPCS_BASED)
4812     {
4813       /* Simple, non-aggregate types (ie not including vectors and
4814          complex) are always returned in a register (or registers).
4815          We don't care about which register here, so we can short-cut
4816          some of the detail.  */
4817       if (!AGGREGATE_TYPE_P (type)
4818           && TREE_CODE (type) != VECTOR_TYPE
4819           && TREE_CODE (type) != COMPLEX_TYPE)
4820         return false;
4821
4822       /* Any return value that is no larger than one word can be
4823          returned in r0.  */
4824       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4825         return false;
4826
4827       /* Check any available co-processors to see if they accept the
4828          type as a register candidate (VFP, for example, can return
4829          some aggregates in consecutive registers).  These aren't
4830          available if the call is variadic.  */
4831       if (aapcs_select_return_coproc (type, fntype) >= 0)
4832         return false;
4833
4834       /* Vector values should be returned using ARM registers, not
4835          memory (unless they're over 16 bytes, which will break since
4836          we only have four call-clobbered registers to play with).  */
4837       if (TREE_CODE (type) == VECTOR_TYPE)
4838         return (size < 0 || size > (4 * UNITS_PER_WORD));
4839
4840       /* The rest go in memory.  */
4841       return true;
4842     }
4843
4844   if (TREE_CODE (type) == VECTOR_TYPE)
4845     return (size < 0 || size > (4 * UNITS_PER_WORD));
4846
4847   if (!AGGREGATE_TYPE_P (type) &&
4848       (TREE_CODE (type) != VECTOR_TYPE))
4849     /* All simple types are returned in registers.  */
4850     return false;
4851
4852   if (arm_abi != ARM_ABI_APCS)
4853     {
4854       /* ATPCS and later return aggregate types in memory only if they are
4855          larger than a word (or are variable size).  */
4856       return (size < 0 || size > UNITS_PER_WORD);
4857     }
4858
4859   /* For the arm-wince targets we choose to be compatible with Microsoft's
4860      ARM and Thumb compilers, which always return aggregates in memory.  */
4861 #ifndef ARM_WINCE
4862   /* All structures/unions bigger than one word are returned in memory.
4863      Also catch the case where int_size_in_bytes returns -1.  In this case
4864      the aggregate is either huge or of variable size, and in either case
4865      we will want to return it via memory and not in a register.  */
4866   if (size < 0 || size > UNITS_PER_WORD)
4867     return true;
4868
4869   if (TREE_CODE (type) == RECORD_TYPE)
4870     {
4871       tree field;
4872
4873       /* For a struct the APCS says that we only return in a register
4874          if the type is 'integer like' and every addressable element
4875          has an offset of zero.  For practical purposes this means
4876          that the structure can have at most one non bit-field element
4877          and that this element must be the first one in the structure.  */
4878
4879       /* Find the first field, ignoring non FIELD_DECL things which will
4880          have been created by C++.  */
4881       for (field = TYPE_FIELDS (type);
4882            field && TREE_CODE (field) != FIELD_DECL;
4883            field = DECL_CHAIN (field))
4884         continue;
4885
4886       if (field == NULL)
4887         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
4888
4889       /* Check that the first field is valid for returning in a register.  */
4890
4891       /* ... Floats are not allowed */
4892       if (FLOAT_TYPE_P (TREE_TYPE (field)))
4893         return true;
4894
4895       /* ... Aggregates that are not themselves valid for returning in
4896          a register are not allowed.  */
4897       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4898         return true;
4899
4900       /* Now check the remaining fields, if any.  Only bitfields are allowed,
4901          since they are not addressable.  */
4902       for (field = DECL_CHAIN (field);
4903            field;
4904            field = DECL_CHAIN (field))
4905         {
4906           if (TREE_CODE (field) != FIELD_DECL)
4907             continue;
4908
4909           if (!DECL_BIT_FIELD_TYPE (field))
4910             return true;
4911         }
4912
4913       return false;
4914     }
4915
4916   if (TREE_CODE (type) == UNION_TYPE)
4917     {
4918       tree field;
4919
4920       /* Unions can be returned in registers if every element is
4921          integral, or can be returned in an integer register.  */
4922       for (field = TYPE_FIELDS (type);
4923            field;
4924            field = DECL_CHAIN (field))
4925         {
4926           if (TREE_CODE (field) != FIELD_DECL)
4927             continue;
4928
4929           if (FLOAT_TYPE_P (TREE_TYPE (field)))
4930             return true;
4931
4932           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4933             return true;
4934         }
4935
4936       return false;
4937     }
4938 #endif /* not ARM_WINCE */
4939
4940   /* Return all other types in memory.  */
4941   return true;
4942 }
4943
4944 const struct pcs_attribute_arg
4945 {
4946   const char *arg;
4947   enum arm_pcs value;
4948 } pcs_attribute_args[] =
4949   {
4950     {"aapcs", ARM_PCS_AAPCS},
4951     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4952 #if 0
4953     /* We could recognize these, but changes would be needed elsewhere
4954      * to implement them.  */
4955     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4956     {"atpcs", ARM_PCS_ATPCS},
4957     {"apcs", ARM_PCS_APCS},
4958 #endif
4959     {NULL, ARM_PCS_UNKNOWN}
4960   };
4961
4962 static enum arm_pcs
4963 arm_pcs_from_attribute (tree attr)
4964 {
4965   const struct pcs_attribute_arg *ptr;
4966   const char *arg;
4967
4968   /* Get the value of the argument.  */
4969   if (TREE_VALUE (attr) == NULL_TREE
4970       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4971     return ARM_PCS_UNKNOWN;
4972
4973   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4974
4975   /* Check it against the list of known arguments.  */
4976   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4977     if (streq (arg, ptr->arg))
4978       return ptr->value;
4979
4980   /* An unrecognized interrupt type.  */
4981   return ARM_PCS_UNKNOWN;
4982 }
4983
4984 /* Get the PCS variant to use for this call.  TYPE is the function's type
4985    specification, DECL is the specific declartion.  DECL may be null if
4986    the call could be indirect or if this is a library call.  */
4987 static enum arm_pcs
4988 arm_get_pcs_model (const_tree type, const_tree decl)
4989 {
4990   bool user_convention = false;
4991   enum arm_pcs user_pcs = arm_pcs_default;
4992   tree attr;
4993
4994   gcc_assert (type);
4995
4996   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4997   if (attr)
4998     {
4999       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5000       user_convention = true;
5001     }
5002
5003   if (TARGET_AAPCS_BASED)
5004     {
5005       /* Detect varargs functions.  These always use the base rules
5006          (no argument is ever a candidate for a co-processor
5007          register).  */
5008       bool base_rules = stdarg_p (type);
5009
5010       if (user_convention)
5011         {
5012           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5013             sorry ("non-AAPCS derived PCS variant");
5014           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5015             error ("variadic functions must use the base AAPCS variant");
5016         }
5017
5018       if (base_rules)
5019         return ARM_PCS_AAPCS;
5020       else if (user_convention)
5021         return user_pcs;
5022       else if (decl && flag_unit_at_a_time)
5023         {
5024           /* Local functions never leak outside this compilation unit,
5025              so we are free to use whatever conventions are
5026              appropriate.  */
5027           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5028           struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5029           if (i && i->local)
5030             return ARM_PCS_AAPCS_LOCAL;
5031         }
5032     }
5033   else if (user_convention && user_pcs != arm_pcs_default)
5034     sorry ("PCS variant");
5035
5036   /* For everything else we use the target's default.  */
5037   return arm_pcs_default;
5038 }
5039
5040
5041 static void
5042 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5043                     const_tree fntype ATTRIBUTE_UNUSED,
5044                     rtx libcall ATTRIBUTE_UNUSED,
5045                     const_tree fndecl ATTRIBUTE_UNUSED)
5046 {
5047   /* Record the unallocated VFP registers.  */
5048   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5049   pcum->aapcs_vfp_reg_alloc = 0;
5050 }
5051
5052 /* Walk down the type tree of TYPE counting consecutive base elements.
5053    If *MODEP is VOIDmode, then set it to the first valid floating point
5054    type.  If a non-floating point type is found, or if a floating point
5055    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5056    otherwise return the count in the sub-tree.  */
5057 static int
5058 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5059 {
5060   enum machine_mode mode;
5061   HOST_WIDE_INT size;
5062
5063   switch (TREE_CODE (type))
5064     {
5065     case REAL_TYPE:
5066       mode = TYPE_MODE (type);
5067       if (mode != DFmode && mode != SFmode)
5068         return -1;
5069
5070       if (*modep == VOIDmode)
5071         *modep = mode;
5072
5073       if (*modep == mode)
5074         return 1;
5075
5076       break;
5077
5078     case COMPLEX_TYPE:
5079       mode = TYPE_MODE (TREE_TYPE (type));
5080       if (mode != DFmode && mode != SFmode)
5081         return -1;
5082
5083       if (*modep == VOIDmode)
5084         *modep = mode;
5085
5086       if (*modep == mode)
5087         return 2;
5088
5089       break;
5090
5091     case VECTOR_TYPE:
5092       /* Use V2SImode and V4SImode as representatives of all 64-bit
5093          and 128-bit vector types, whether or not those modes are
5094          supported with the present options.  */
5095       size = int_size_in_bytes (type);
5096       switch (size)
5097         {
5098         case 8:
5099           mode = V2SImode;
5100           break;
5101         case 16:
5102           mode = V4SImode;
5103           break;
5104         default:
5105           return -1;
5106         }
5107
5108       if (*modep == VOIDmode)
5109         *modep = mode;
5110
5111       /* Vector modes are considered to be opaque: two vectors are
5112          equivalent for the purposes of being homogeneous aggregates
5113          if they are the same size.  */
5114       if (*modep == mode)
5115         return 1;
5116
5117       break;
5118
5119     case ARRAY_TYPE:
5120       {
5121         int count;
5122         tree index = TYPE_DOMAIN (type);
5123
5124         /* Can't handle incomplete types.  */
5125         if (!COMPLETE_TYPE_P (type))
5126           return -1;
5127
5128         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5129         if (count == -1
5130             || !index
5131             || !TYPE_MAX_VALUE (index)
5132             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5133             || !TYPE_MIN_VALUE (index)
5134             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5135             || count < 0)
5136           return -1;
5137
5138         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5139                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5140
5141         /* There must be no padding.  */
5142         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5143             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5144                 != count * GET_MODE_BITSIZE (*modep)))
5145           return -1;
5146
5147         return count;
5148       }
5149
5150     case RECORD_TYPE:
5151       {
5152         int count = 0;
5153         int sub_count;
5154         tree field;
5155
5156         /* Can't handle incomplete types.  */
5157         if (!COMPLETE_TYPE_P (type))
5158           return -1;
5159
5160         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5161           {
5162             if (TREE_CODE (field) != FIELD_DECL)
5163               continue;
5164
5165             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5166             if (sub_count < 0)
5167               return -1;
5168             count += sub_count;
5169           }
5170
5171         /* There must be no padding.  */
5172         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5173             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5174                 != count * GET_MODE_BITSIZE (*modep)))
5175           return -1;
5176
5177         return count;
5178       }
5179
5180     case UNION_TYPE:
5181     case QUAL_UNION_TYPE:
5182       {
5183         /* These aren't very interesting except in a degenerate case.  */
5184         int count = 0;
5185         int sub_count;
5186         tree field;
5187
5188         /* Can't handle incomplete types.  */
5189         if (!COMPLETE_TYPE_P (type))
5190           return -1;
5191
5192         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5193           {
5194             if (TREE_CODE (field) != FIELD_DECL)
5195               continue;
5196
5197             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5198             if (sub_count < 0)
5199               return -1;
5200             count = count > sub_count ? count : sub_count;
5201           }
5202
5203         /* There must be no padding.  */
5204         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5205             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5206                 != count * GET_MODE_BITSIZE (*modep)))
5207           return -1;
5208
5209         return count;
5210       }
5211
5212     default:
5213       break;
5214     }
5215
5216   return -1;
5217 }
5218
5219 /* Return true if PCS_VARIANT should use VFP registers.  */
5220 static bool
5221 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5222 {
5223   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5224     {
5225       static bool seen_thumb1_vfp = false;
5226
5227       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5228         {
5229           sorry ("Thumb-1 hard-float VFP ABI");
5230           /* sorry() is not immediately fatal, so only display this once.  */
5231           seen_thumb1_vfp = true;
5232         }
5233
5234       return true;
5235     }
5236
5237   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5238     return false;
5239
5240   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5241           (TARGET_VFP_DOUBLE || !is_double));
5242 }
5243
5244 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5245    suitable for passing or returning in VFP registers for the PCS
5246    variant selected.  If it is, then *BASE_MODE is updated to contain
5247    a machine mode describing each element of the argument's type and
5248    *COUNT to hold the number of such elements.  */
5249 static bool
5250 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5251                                        enum machine_mode mode, const_tree type,
5252                                        enum machine_mode *base_mode, int *count)
5253 {
5254   enum machine_mode new_mode = VOIDmode;
5255
5256   /* If we have the type information, prefer that to working things
5257      out from the mode.  */
5258   if (type)
5259     {
5260       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5261
5262       if (ag_count > 0 && ag_count <= 4)
5263         *count = ag_count;
5264       else
5265         return false;
5266     }
5267   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5268            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5269            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5270     {
5271       *count = 1;
5272       new_mode = mode;
5273     }
5274   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5275     {
5276       *count = 2;
5277       new_mode = (mode == DCmode ? DFmode : SFmode);
5278     }
5279   else
5280     return false;
5281
5282
5283   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5284     return false;
5285
5286   *base_mode = new_mode;
5287   return true;
5288 }
5289
5290 static bool
5291 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5292                                enum machine_mode mode, const_tree type)
5293 {
5294   int count ATTRIBUTE_UNUSED;
5295   enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5296
5297   if (!use_vfp_abi (pcs_variant, false))
5298     return false;
5299   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5300                                                 &ag_mode, &count);
5301 }
5302
5303 static bool
5304 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5305                              const_tree type)
5306 {
5307   if (!use_vfp_abi (pcum->pcs_variant, false))
5308     return false;
5309
5310   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5311                                                 &pcum->aapcs_vfp_rmode,
5312                                                 &pcum->aapcs_vfp_rcount);
5313 }
5314
5315 static bool
5316 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5317                     const_tree type  ATTRIBUTE_UNUSED)
5318 {
5319   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5320   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5321   int regno;
5322
5323   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5324     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5325       {
5326         pcum->aapcs_vfp_reg_alloc = mask << regno;
5327         if (mode == BLKmode
5328             || (mode == TImode && ! TARGET_NEON)
5329             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5330           {
5331             int i;
5332             int rcount = pcum->aapcs_vfp_rcount;
5333             int rshift = shift;
5334             enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5335             rtx par;
5336             if (!TARGET_NEON)
5337               {
5338                 /* Avoid using unsupported vector modes.  */
5339                 if (rmode == V2SImode)
5340                   rmode = DImode;
5341                 else if (rmode == V4SImode)
5342                   {
5343                     rmode = DImode;
5344                     rcount *= 2;
5345                     rshift /= 2;
5346                   }
5347               }
5348             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5349             for (i = 0; i < rcount; i++)
5350               {
5351                 rtx tmp = gen_rtx_REG (rmode,
5352                                        FIRST_VFP_REGNUM + regno + i * rshift);
5353                 tmp = gen_rtx_EXPR_LIST
5354                   (VOIDmode, tmp,
5355                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5356                 XVECEXP (par, 0, i) = tmp;
5357               }
5358
5359             pcum->aapcs_reg = par;
5360           }
5361         else
5362           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5363         return true;
5364       }
5365   return false;
5366 }
5367
5368 static rtx
5369 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5370                                enum machine_mode mode,
5371                                const_tree type ATTRIBUTE_UNUSED)
5372 {
5373   if (!use_vfp_abi (pcs_variant, false))
5374     return NULL;
5375
5376   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5377     {
5378       int count;
5379       enum machine_mode ag_mode;
5380       int i;
5381       rtx par;
5382       int shift;
5383
5384       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5385                                              &ag_mode, &count);
5386
5387       if (!TARGET_NEON)
5388         {
5389           if (ag_mode == V2SImode)
5390             ag_mode = DImode;
5391           else if (ag_mode == V4SImode)
5392             {
5393               ag_mode = DImode;
5394               count *= 2;
5395             }
5396         }
5397       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5398       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5399       for (i = 0; i < count; i++)
5400         {
5401           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5402           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5403                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5404           XVECEXP (par, 0, i) = tmp;
5405         }
5406
5407       return par;
5408     }
5409
5410   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5411 }
5412
5413 static void
5414 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5415                    enum machine_mode mode  ATTRIBUTE_UNUSED,
5416                    const_tree type  ATTRIBUTE_UNUSED)
5417 {
5418   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5419   pcum->aapcs_vfp_reg_alloc = 0;
5420   return;
5421 }
5422
5423 #define AAPCS_CP(X)                             \
5424   {                                             \
5425     aapcs_ ## X ## _cum_init,                   \
5426     aapcs_ ## X ## _is_call_candidate,          \
5427     aapcs_ ## X ## _allocate,                   \
5428     aapcs_ ## X ## _is_return_candidate,        \
5429     aapcs_ ## X ## _allocate_return_reg,        \
5430     aapcs_ ## X ## _advance                     \
5431   }
5432
5433 /* Table of co-processors that can be used to pass arguments in
5434    registers.  Idealy no arugment should be a candidate for more than
5435    one co-processor table entry, but the table is processed in order
5436    and stops after the first match.  If that entry then fails to put
5437    the argument into a co-processor register, the argument will go on
5438    the stack.  */
5439 static struct
5440 {
5441   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5442   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5443
5444   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5445      BLKmode) is a candidate for this co-processor's registers; this
5446      function should ignore any position-dependent state in
5447      CUMULATIVE_ARGS and only use call-type dependent information.  */
5448   bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5449
5450   /* Return true if the argument does get a co-processor register; it
5451      should set aapcs_reg to an RTX of the register allocated as is
5452      required for a return from FUNCTION_ARG.  */
5453   bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5454
5455   /* Return true if a result of mode MODE (or type TYPE if MODE is
5456      BLKmode) is can be returned in this co-processor's registers.  */
5457   bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5458
5459   /* Allocate and return an RTX element to hold the return type of a
5460      call, this routine must not fail and will only be called if
5461      is_return_candidate returned true with the same parameters.  */
5462   rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5463
5464   /* Finish processing this argument and prepare to start processing
5465      the next one.  */
5466   void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5467 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5468   {
5469     AAPCS_CP(vfp)
5470   };
5471
5472 #undef AAPCS_CP
5473
5474 static int
5475 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5476                           const_tree type)
5477 {
5478   int i;
5479
5480   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5481     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5482       return i;
5483
5484   return -1;
5485 }
5486
5487 static int
5488 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5489 {
5490   /* We aren't passed a decl, so we can't check that a call is local.
5491      However, it isn't clear that that would be a win anyway, since it
5492      might limit some tail-calling opportunities.  */
5493   enum arm_pcs pcs_variant;
5494
5495   if (fntype)
5496     {
5497       const_tree fndecl = NULL_TREE;
5498
5499       if (TREE_CODE (fntype) == FUNCTION_DECL)
5500         {
5501           fndecl = fntype;
5502           fntype = TREE_TYPE (fntype);
5503         }
5504
5505       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5506     }
5507   else
5508     pcs_variant = arm_pcs_default;
5509
5510   if (pcs_variant != ARM_PCS_AAPCS)
5511     {
5512       int i;
5513
5514       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5515         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5516                                                         TYPE_MODE (type),
5517                                                         type))
5518           return i;
5519     }
5520   return -1;
5521 }
5522
5523 static rtx
5524 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5525                            const_tree fntype)
5526 {
5527   /* We aren't passed a decl, so we can't check that a call is local.
5528      However, it isn't clear that that would be a win anyway, since it
5529      might limit some tail-calling opportunities.  */
5530   enum arm_pcs pcs_variant;
5531   int unsignedp ATTRIBUTE_UNUSED;
5532
5533   if (fntype)
5534     {
5535       const_tree fndecl = NULL_TREE;
5536
5537       if (TREE_CODE (fntype) == FUNCTION_DECL)
5538         {
5539           fndecl = fntype;
5540           fntype = TREE_TYPE (fntype);
5541         }
5542
5543       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5544     }
5545   else
5546     pcs_variant = arm_pcs_default;
5547
5548   /* Promote integer types.  */
5549   if (type && INTEGRAL_TYPE_P (type))
5550     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5551
5552   if (pcs_variant != ARM_PCS_AAPCS)
5553     {
5554       int i;
5555
5556       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5557         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5558                                                         type))
5559           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5560                                                              mode, type);
5561     }
5562
5563   /* Promotes small structs returned in a register to full-word size
5564      for big-endian AAPCS.  */
5565   if (type && arm_return_in_msb (type))
5566     {
5567       HOST_WIDE_INT size = int_size_in_bytes (type);
5568       if (size % UNITS_PER_WORD != 0)
5569         {
5570           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5571           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5572         }
5573     }
5574
5575   return gen_rtx_REG (mode, R0_REGNUM);
5576 }
5577
5578 static rtx
5579 aapcs_libcall_value (enum machine_mode mode)
5580 {
5581   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5582       && GET_MODE_SIZE (mode) <= 4)
5583     mode = SImode;
5584
5585   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5586 }
5587
5588 /* Lay out a function argument using the AAPCS rules.  The rule
5589    numbers referred to here are those in the AAPCS.  */
5590 static void
5591 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5592                   const_tree type, bool named)
5593 {
5594   int nregs, nregs2;
5595   int ncrn;
5596
5597   /* We only need to do this once per argument.  */
5598   if (pcum->aapcs_arg_processed)
5599     return;
5600
5601   pcum->aapcs_arg_processed = true;
5602
5603   /* Special case: if named is false then we are handling an incoming
5604      anonymous argument which is on the stack.  */
5605   if (!named)
5606     return;
5607
5608   /* Is this a potential co-processor register candidate?  */
5609   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5610     {
5611       int slot = aapcs_select_call_coproc (pcum, mode, type);
5612       pcum->aapcs_cprc_slot = slot;
5613
5614       /* We don't have to apply any of the rules from part B of the
5615          preparation phase, these are handled elsewhere in the
5616          compiler.  */
5617
5618       if (slot >= 0)
5619         {
5620           /* A Co-processor register candidate goes either in its own
5621              class of registers or on the stack.  */
5622           if (!pcum->aapcs_cprc_failed[slot])
5623             {
5624               /* C1.cp - Try to allocate the argument to co-processor
5625                  registers.  */
5626               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5627                 return;
5628
5629               /* C2.cp - Put the argument on the stack and note that we
5630                  can't assign any more candidates in this slot.  We also
5631                  need to note that we have allocated stack space, so that
5632                  we won't later try to split a non-cprc candidate between
5633                  core registers and the stack.  */
5634               pcum->aapcs_cprc_failed[slot] = true;
5635               pcum->can_split = false;
5636             }
5637
5638           /* We didn't get a register, so this argument goes on the
5639              stack.  */
5640           gcc_assert (pcum->can_split == false);
5641           return;
5642         }
5643     }
5644
5645   /* C3 - For double-word aligned arguments, round the NCRN up to the
5646      next even number.  */
5647   ncrn = pcum->aapcs_ncrn;
5648   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5649     ncrn++;
5650
5651   nregs = ARM_NUM_REGS2(mode, type);
5652
5653   /* Sigh, this test should really assert that nregs > 0, but a GCC
5654      extension allows empty structs and then gives them empty size; it
5655      then allows such a structure to be passed by value.  For some of
5656      the code below we have to pretend that such an argument has
5657      non-zero size so that we 'locate' it correctly either in
5658      registers or on the stack.  */
5659   gcc_assert (nregs >= 0);
5660
5661   nregs2 = nregs ? nregs : 1;
5662
5663   /* C4 - Argument fits entirely in core registers.  */
5664   if (ncrn + nregs2 <= NUM_ARG_REGS)
5665     {
5666       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5667       pcum->aapcs_next_ncrn = ncrn + nregs;
5668       return;
5669     }
5670
5671   /* C5 - Some core registers left and there are no arguments already
5672      on the stack: split this argument between the remaining core
5673      registers and the stack.  */
5674   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5675     {
5676       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5677       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5678       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5679       return;
5680     }
5681
5682   /* C6 - NCRN is set to 4.  */
5683   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5684
5685   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5686   return;
5687 }
5688
5689 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5690    for a call to a function whose data type is FNTYPE.
5691    For a library call, FNTYPE is NULL.  */
5692 void
5693 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5694                           rtx libname,
5695                           tree fndecl ATTRIBUTE_UNUSED)
5696 {
5697   /* Long call handling.  */
5698   if (fntype)
5699     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5700   else
5701     pcum->pcs_variant = arm_pcs_default;
5702
5703   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5704     {
5705       if (arm_libcall_uses_aapcs_base (libname))
5706         pcum->pcs_variant = ARM_PCS_AAPCS;
5707
5708       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5709       pcum->aapcs_reg = NULL_RTX;
5710       pcum->aapcs_partial = 0;
5711       pcum->aapcs_arg_processed = false;
5712       pcum->aapcs_cprc_slot = -1;
5713       pcum->can_split = true;
5714
5715       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5716         {
5717           int i;
5718
5719           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5720             {
5721               pcum->aapcs_cprc_failed[i] = false;
5722               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5723             }
5724         }
5725       return;
5726     }
5727
5728   /* Legacy ABIs */
5729
5730   /* On the ARM, the offset starts at 0.  */
5731   pcum->nregs = 0;
5732   pcum->iwmmxt_nregs = 0;
5733   pcum->can_split = true;
5734
5735   /* Varargs vectors are treated the same as long long.
5736      named_count avoids having to change the way arm handles 'named' */
5737   pcum->named_count = 0;
5738   pcum->nargs = 0;
5739
5740   if (TARGET_REALLY_IWMMXT && fntype)
5741     {
5742       tree fn_arg;
5743
5744       for (fn_arg = TYPE_ARG_TYPES (fntype);
5745            fn_arg;
5746            fn_arg = TREE_CHAIN (fn_arg))
5747         pcum->named_count += 1;
5748
5749       if (! pcum->named_count)
5750         pcum->named_count = INT_MAX;
5751     }
5752 }
5753
5754 /* Return true if we use LRA instead of reload pass.  */
5755 static bool
5756 arm_lra_p (void)
5757 {
5758   return arm_lra_flag;
5759 }
5760
5761 /* Return true if mode/type need doubleword alignment.  */
5762 static bool
5763 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5764 {
5765   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5766           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5767 }
5768
5769
5770 /* Determine where to put an argument to a function.
5771    Value is zero to push the argument on the stack,
5772    or a hard register in which to store the argument.
5773
5774    MODE is the argument's machine mode.
5775    TYPE is the data type of the argument (as a tree).
5776     This is null for libcalls where that information may
5777     not be available.
5778    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5779     the preceding args and about the function being called.
5780    NAMED is nonzero if this argument is a named parameter
5781     (otherwise it is an extra parameter matching an ellipsis).
5782
5783    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5784    other arguments are passed on the stack.  If (NAMED == 0) (which happens
5785    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5786    defined), say it is passed in the stack (function_prologue will
5787    indeed make it pass in the stack if necessary).  */
5788
5789 static rtx
5790 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5791                   const_tree type, bool named)
5792 {
5793   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5794   int nregs;
5795
5796   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
5797      a call insn (op3 of a call_value insn).  */
5798   if (mode == VOIDmode)
5799     return const0_rtx;
5800
5801   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5802     {
5803       aapcs_layout_arg (pcum, mode, type, named);
5804       return pcum->aapcs_reg;
5805     }
5806
5807   /* Varargs vectors are treated the same as long long.
5808      named_count avoids having to change the way arm handles 'named' */
5809   if (TARGET_IWMMXT_ABI
5810       && arm_vector_mode_supported_p (mode)
5811       && pcum->named_count > pcum->nargs + 1)
5812     {
5813       if (pcum->iwmmxt_nregs <= 9)
5814         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5815       else
5816         {
5817           pcum->can_split = false;
5818           return NULL_RTX;
5819         }
5820     }
5821
5822   /* Put doubleword aligned quantities in even register pairs.  */
5823   if (pcum->nregs & 1
5824       && ARM_DOUBLEWORD_ALIGN
5825       && arm_needs_doubleword_align (mode, type))
5826     pcum->nregs++;
5827
5828   /* Only allow splitting an arg between regs and memory if all preceding
5829      args were allocated to regs.  For args passed by reference we only count
5830      the reference pointer.  */
5831   if (pcum->can_split)
5832     nregs = 1;
5833   else
5834     nregs = ARM_NUM_REGS2 (mode, type);
5835
5836   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5837     return NULL_RTX;
5838
5839   return gen_rtx_REG (mode, pcum->nregs);
5840 }
5841
5842 static unsigned int
5843 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5844 {
5845   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5846           ? DOUBLEWORD_ALIGNMENT
5847           : PARM_BOUNDARY);
5848 }
5849
5850 static int
5851 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5852                        tree type, bool named)
5853 {
5854   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5855   int nregs = pcum->nregs;
5856
5857   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5858     {
5859       aapcs_layout_arg (pcum, mode, type, named);
5860       return pcum->aapcs_partial;
5861     }
5862
5863   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5864     return 0;
5865
5866   if (NUM_ARG_REGS > nregs
5867       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5868       && pcum->can_split)
5869     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5870
5871   return 0;
5872 }
5873
5874 /* Update the data in PCUM to advance over an argument
5875    of mode MODE and data type TYPE.
5876    (TYPE is null for libcalls where that information may not be available.)  */
5877
5878 static void
5879 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5880                           const_tree type, bool named)
5881 {
5882   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5883
5884   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5885     {
5886       aapcs_layout_arg (pcum, mode, type, named);
5887
5888       if (pcum->aapcs_cprc_slot >= 0)
5889         {
5890           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5891                                                               type);
5892           pcum->aapcs_cprc_slot = -1;
5893         }
5894
5895       /* Generic stuff.  */
5896       pcum->aapcs_arg_processed = false;
5897       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5898       pcum->aapcs_reg = NULL_RTX;
5899       pcum->aapcs_partial = 0;
5900     }
5901   else
5902     {
5903       pcum->nargs += 1;
5904       if (arm_vector_mode_supported_p (mode)
5905           && pcum->named_count > pcum->nargs
5906           && TARGET_IWMMXT_ABI)
5907         pcum->iwmmxt_nregs += 1;
5908       else
5909         pcum->nregs += ARM_NUM_REGS2 (mode, type);
5910     }
5911 }
5912
5913 /* Variable sized types are passed by reference.  This is a GCC
5914    extension to the ARM ABI.  */
5915
5916 static bool
5917 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5918                        enum machine_mode mode ATTRIBUTE_UNUSED,
5919                        const_tree type, bool named ATTRIBUTE_UNUSED)
5920 {
5921   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5922 }
5923 \f
5924 /* Encode the current state of the #pragma [no_]long_calls.  */
5925 typedef enum
5926 {
5927   OFF,          /* No #pragma [no_]long_calls is in effect.  */
5928   LONG,         /* #pragma long_calls is in effect.  */
5929   SHORT         /* #pragma no_long_calls is in effect.  */
5930 } arm_pragma_enum;
5931
5932 static arm_pragma_enum arm_pragma_long_calls = OFF;
5933
5934 void
5935 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5936 {
5937   arm_pragma_long_calls = LONG;
5938 }
5939
5940 void
5941 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5942 {
5943   arm_pragma_long_calls = SHORT;
5944 }
5945
5946 void
5947 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5948 {
5949   arm_pragma_long_calls = OFF;
5950 }
5951 \f
5952 /* Handle an attribute requiring a FUNCTION_DECL;
5953    arguments as in struct attribute_spec.handler.  */
5954 static tree
5955 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5956                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5957 {
5958   if (TREE_CODE (*node) != FUNCTION_DECL)
5959     {
5960       warning (OPT_Wattributes, "%qE attribute only applies to functions",
5961                name);
5962       *no_add_attrs = true;
5963     }
5964
5965   return NULL_TREE;
5966 }
5967
5968 /* Handle an "interrupt" or "isr" attribute;
5969    arguments as in struct attribute_spec.handler.  */
5970 static tree
5971 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5972                           bool *no_add_attrs)
5973 {
5974   if (DECL_P (*node))
5975     {
5976       if (TREE_CODE (*node) != FUNCTION_DECL)
5977         {
5978           warning (OPT_Wattributes, "%qE attribute only applies to functions",
5979                    name);
5980           *no_add_attrs = true;
5981         }
5982       /* FIXME: the argument if any is checked for type attributes;
5983          should it be checked for decl ones?  */
5984     }
5985   else
5986     {
5987       if (TREE_CODE (*node) == FUNCTION_TYPE
5988           || TREE_CODE (*node) == METHOD_TYPE)
5989         {
5990           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5991             {
5992               warning (OPT_Wattributes, "%qE attribute ignored",
5993                        name);
5994               *no_add_attrs = true;
5995             }
5996         }
5997       else if (TREE_CODE (*node) == POINTER_TYPE
5998                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5999                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6000                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6001         {
6002           *node = build_variant_type_copy (*node);
6003           TREE_TYPE (*node) = build_type_attribute_variant
6004             (TREE_TYPE (*node),
6005              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6006           *no_add_attrs = true;
6007         }
6008       else
6009         {
6010           /* Possibly pass this attribute on from the type to a decl.  */
6011           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6012                        | (int) ATTR_FLAG_FUNCTION_NEXT
6013                        | (int) ATTR_FLAG_ARRAY_NEXT))
6014             {
6015               *no_add_attrs = true;
6016               return tree_cons (name, args, NULL_TREE);
6017             }
6018           else
6019             {
6020               warning (OPT_Wattributes, "%qE attribute ignored",
6021                        name);
6022             }
6023         }
6024     }
6025
6026   return NULL_TREE;
6027 }
6028
6029 /* Handle a "pcs" attribute; arguments as in struct
6030    attribute_spec.handler.  */
6031 static tree
6032 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6033                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6034 {
6035   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6036     {
6037       warning (OPT_Wattributes, "%qE attribute ignored", name);
6038       *no_add_attrs = true;
6039     }
6040   return NULL_TREE;
6041 }
6042
6043 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6044 /* Handle the "notshared" attribute.  This attribute is another way of
6045    requesting hidden visibility.  ARM's compiler supports
6046    "__declspec(notshared)"; we support the same thing via an
6047    attribute.  */
6048
6049 static tree
6050 arm_handle_notshared_attribute (tree *node,
6051                                 tree name ATTRIBUTE_UNUSED,
6052                                 tree args ATTRIBUTE_UNUSED,
6053                                 int flags ATTRIBUTE_UNUSED,
6054                                 bool *no_add_attrs)
6055 {
6056   tree decl = TYPE_NAME (*node);
6057
6058   if (decl)
6059     {
6060       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6061       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6062       *no_add_attrs = false;
6063     }
6064   return NULL_TREE;
6065 }
6066 #endif
6067
6068 /* Return 0 if the attributes for two types are incompatible, 1 if they
6069    are compatible, and 2 if they are nearly compatible (which causes a
6070    warning to be generated).  */
6071 static int
6072 arm_comp_type_attributes (const_tree type1, const_tree type2)
6073 {
6074   int l1, l2, s1, s2;
6075
6076   /* Check for mismatch of non-default calling convention.  */
6077   if (TREE_CODE (type1) != FUNCTION_TYPE)
6078     return 1;
6079
6080   /* Check for mismatched call attributes.  */
6081   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6082   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6083   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6084   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6085
6086   /* Only bother to check if an attribute is defined.  */
6087   if (l1 | l2 | s1 | s2)
6088     {
6089       /* If one type has an attribute, the other must have the same attribute.  */
6090       if ((l1 != l2) || (s1 != s2))
6091         return 0;
6092
6093       /* Disallow mixed attributes.  */
6094       if ((l1 & s2) || (l2 & s1))
6095         return 0;
6096     }
6097
6098   /* Check for mismatched ISR attribute.  */
6099   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6100   if (! l1)
6101     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6102   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6103   if (! l2)
6104     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6105   if (l1 != l2)
6106     return 0;
6107
6108   return 1;
6109 }
6110
6111 /*  Assigns default attributes to newly defined type.  This is used to
6112     set short_call/long_call attributes for function types of
6113     functions defined inside corresponding #pragma scopes.  */
6114 static void
6115 arm_set_default_type_attributes (tree type)
6116 {
6117   /* Add __attribute__ ((long_call)) to all functions, when
6118      inside #pragma long_calls or __attribute__ ((short_call)),
6119      when inside #pragma no_long_calls.  */
6120   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6121     {
6122       tree type_attr_list, attr_name;
6123       type_attr_list = TYPE_ATTRIBUTES (type);
6124
6125       if (arm_pragma_long_calls == LONG)
6126         attr_name = get_identifier ("long_call");
6127       else if (arm_pragma_long_calls == SHORT)
6128         attr_name = get_identifier ("short_call");
6129       else
6130         return;
6131
6132       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6133       TYPE_ATTRIBUTES (type) = type_attr_list;
6134     }
6135 }
6136 \f
6137 /* Return true if DECL is known to be linked into section SECTION.  */
6138
6139 static bool
6140 arm_function_in_section_p (tree decl, section *section)
6141 {
6142   /* We can only be certain about functions defined in the same
6143      compilation unit.  */
6144   if (!TREE_STATIC (decl))
6145     return false;
6146
6147   /* Make sure that SYMBOL always binds to the definition in this
6148      compilation unit.  */
6149   if (!targetm.binds_local_p (decl))
6150     return false;
6151
6152   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6153   if (!DECL_SECTION_NAME (decl))
6154     {
6155       /* Make sure that we will not create a unique section for DECL.  */
6156       if (flag_function_sections || DECL_ONE_ONLY (decl))
6157         return false;
6158     }
6159
6160   return function_section (decl) == section;
6161 }
6162
6163 /* Return nonzero if a 32-bit "long_call" should be generated for
6164    a call from the current function to DECL.  We generate a long_call
6165    if the function:
6166
6167         a.  has an __attribute__((long call))
6168      or b.  is within the scope of a #pragma long_calls
6169      or c.  the -mlong-calls command line switch has been specified
6170
6171    However we do not generate a long call if the function:
6172
6173         d.  has an __attribute__ ((short_call))
6174      or e.  is inside the scope of a #pragma no_long_calls
6175      or f.  is defined in the same section as the current function.  */
6176
6177 bool
6178 arm_is_long_call_p (tree decl)
6179 {
6180   tree attrs;
6181
6182   if (!decl)
6183     return TARGET_LONG_CALLS;
6184
6185   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6186   if (lookup_attribute ("short_call", attrs))
6187     return false;
6188
6189   /* For "f", be conservative, and only cater for cases in which the
6190      whole of the current function is placed in the same section.  */
6191   if (!flag_reorder_blocks_and_partition
6192       && TREE_CODE (decl) == FUNCTION_DECL
6193       && arm_function_in_section_p (decl, current_function_section ()))
6194     return false;
6195
6196   if (lookup_attribute ("long_call", attrs))
6197     return true;
6198
6199   return TARGET_LONG_CALLS;
6200 }
6201
6202 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6203 static bool
6204 arm_function_ok_for_sibcall (tree decl, tree exp)
6205 {
6206   unsigned long func_type;
6207
6208   if (cfun->machine->sibcall_blocked)
6209     return false;
6210
6211   /* Never tailcall something if we are generating code for Thumb-1.  */
6212   if (TARGET_THUMB1)
6213     return false;
6214
6215   /* The PIC register is live on entry to VxWorks PLT entries, so we
6216      must make the call before restoring the PIC register.  */
6217   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6218     return false;
6219
6220   /* Cannot tail-call to long calls, since these are out of range of
6221      a branch instruction.  */
6222   if (decl && arm_is_long_call_p (decl))
6223     return false;
6224
6225   /* If we are interworking and the function is not declared static
6226      then we can't tail-call it unless we know that it exists in this
6227      compilation unit (since it might be a Thumb routine).  */
6228   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6229       && !TREE_ASM_WRITTEN (decl))
6230     return false;
6231
6232   func_type = arm_current_func_type ();
6233   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6234   if (IS_INTERRUPT (func_type))
6235     return false;
6236
6237   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6238     {
6239       /* Check that the return value locations are the same.  For
6240          example that we aren't returning a value from the sibling in
6241          a VFP register but then need to transfer it to a core
6242          register.  */
6243       rtx a, b;
6244
6245       a = arm_function_value (TREE_TYPE (exp), decl, false);
6246       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6247                               cfun->decl, false);
6248       if (!rtx_equal_p (a, b))
6249         return false;
6250     }
6251
6252   /* Never tailcall if function may be called with a misaligned SP.  */
6253   if (IS_STACKALIGN (func_type))
6254     return false;
6255
6256   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6257      references should become a NOP.  Don't convert such calls into
6258      sibling calls.  */
6259   if (TARGET_AAPCS_BASED
6260       && arm_abi == ARM_ABI_AAPCS
6261       && decl
6262       && DECL_WEAK (decl))
6263     return false;
6264
6265   /* Everything else is ok.  */
6266   return true;
6267 }
6268
6269 \f
6270 /* Addressing mode support functions.  */
6271
6272 /* Return nonzero if X is a legitimate immediate operand when compiling
6273    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6274 int
6275 legitimate_pic_operand_p (rtx x)
6276 {
6277   if (GET_CODE (x) == SYMBOL_REF
6278       || (GET_CODE (x) == CONST
6279           && GET_CODE (XEXP (x, 0)) == PLUS
6280           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6281     return 0;
6282
6283   return 1;
6284 }
6285
6286 /* Record that the current function needs a PIC register.  Initialize
6287    cfun->machine->pic_reg if we have not already done so.  */
6288
6289 static void
6290 require_pic_register (void)
6291 {
6292   /* A lot of the logic here is made obscure by the fact that this
6293      routine gets called as part of the rtx cost estimation process.
6294      We don't want those calls to affect any assumptions about the real
6295      function; and further, we can't call entry_of_function() until we
6296      start the real expansion process.  */
6297   if (!crtl->uses_pic_offset_table)
6298     {
6299       gcc_assert (can_create_pseudo_p ());
6300       if (arm_pic_register != INVALID_REGNUM
6301           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6302         {
6303           if (!cfun->machine->pic_reg)
6304             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6305
6306           /* Play games to avoid marking the function as needing pic
6307              if we are being called as part of the cost-estimation
6308              process.  */
6309           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6310             crtl->uses_pic_offset_table = 1;
6311         }
6312       else
6313         {
6314           rtx seq, insn;
6315
6316           if (!cfun->machine->pic_reg)
6317             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6318
6319           /* Play games to avoid marking the function as needing pic
6320              if we are being called as part of the cost-estimation
6321              process.  */
6322           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6323             {
6324               crtl->uses_pic_offset_table = 1;
6325               start_sequence ();
6326
6327               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6328                   && arm_pic_register > LAST_LO_REGNUM)
6329                 emit_move_insn (cfun->machine->pic_reg,
6330                                 gen_rtx_REG (Pmode, arm_pic_register));
6331               else
6332                 arm_load_pic_register (0UL);
6333
6334               seq = get_insns ();
6335               end_sequence ();
6336
6337               for (insn = seq; insn; insn = NEXT_INSN (insn))
6338                 if (INSN_P (insn))
6339                   INSN_LOCATION (insn) = prologue_location;
6340
6341               /* We can be called during expansion of PHI nodes, where
6342                  we can't yet emit instructions directly in the final
6343                  insn stream.  Queue the insns on the entry edge, they will
6344                  be committed after everything else is expanded.  */
6345               insert_insn_on_edge (seq,
6346                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6347             }
6348         }
6349     }
6350 }
6351
6352 rtx
6353 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6354 {
6355   if (GET_CODE (orig) == SYMBOL_REF
6356       || GET_CODE (orig) == LABEL_REF)
6357     {
6358       rtx insn;
6359
6360       if (reg == 0)
6361         {
6362           gcc_assert (can_create_pseudo_p ());
6363           reg = gen_reg_rtx (Pmode);
6364         }
6365
6366       /* VxWorks does not impose a fixed gap between segments; the run-time
6367          gap can be different from the object-file gap.  We therefore can't
6368          use GOTOFF unless we are absolutely sure that the symbol is in the
6369          same segment as the GOT.  Unfortunately, the flexibility of linker
6370          scripts means that we can't be sure of that in general, so assume
6371          that GOTOFF is never valid on VxWorks.  */
6372       if ((GET_CODE (orig) == LABEL_REF
6373            || (GET_CODE (orig) == SYMBOL_REF &&
6374                SYMBOL_REF_LOCAL_P (orig)))
6375           && NEED_GOT_RELOC
6376           && arm_pic_data_is_text_relative)
6377         insn = arm_pic_static_addr (orig, reg);
6378       else
6379         {
6380           rtx pat;
6381           rtx mem;
6382
6383           /* If this function doesn't have a pic register, create one now.  */
6384           require_pic_register ();
6385
6386           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6387
6388           /* Make the MEM as close to a constant as possible.  */
6389           mem = SET_SRC (pat);
6390           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6391           MEM_READONLY_P (mem) = 1;
6392           MEM_NOTRAP_P (mem) = 1;
6393
6394           insn = emit_insn (pat);
6395         }
6396
6397       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6398          by loop.  */
6399       set_unique_reg_note (insn, REG_EQUAL, orig);
6400
6401       return reg;
6402     }
6403   else if (GET_CODE (orig) == CONST)
6404     {
6405       rtx base, offset;
6406
6407       if (GET_CODE (XEXP (orig, 0)) == PLUS
6408           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6409         return orig;
6410
6411       /* Handle the case where we have: const (UNSPEC_TLS).  */
6412       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6413           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6414         return orig;
6415
6416       /* Handle the case where we have:
6417          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6418          CONST_INT.  */
6419       if (GET_CODE (XEXP (orig, 0)) == PLUS
6420           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6421           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6422         {
6423           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6424           return orig;
6425         }
6426
6427       if (reg == 0)
6428         {
6429           gcc_assert (can_create_pseudo_p ());
6430           reg = gen_reg_rtx (Pmode);
6431         }
6432
6433       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6434
6435       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6436       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6437                                        base == reg ? 0 : reg);
6438
6439       if (CONST_INT_P (offset))
6440         {
6441           /* The base register doesn't really matter, we only want to
6442              test the index for the appropriate mode.  */
6443           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6444             {
6445               gcc_assert (can_create_pseudo_p ());
6446               offset = force_reg (Pmode, offset);
6447             }
6448
6449           if (CONST_INT_P (offset))
6450             return plus_constant (Pmode, base, INTVAL (offset));
6451         }
6452
6453       if (GET_MODE_SIZE (mode) > 4
6454           && (GET_MODE_CLASS (mode) == MODE_INT
6455               || TARGET_SOFT_FLOAT))
6456         {
6457           emit_insn (gen_addsi3 (reg, base, offset));
6458           return reg;
6459         }
6460
6461       return gen_rtx_PLUS (Pmode, base, offset);
6462     }
6463
6464   return orig;
6465 }
6466
6467
6468 /* Find a spare register to use during the prolog of a function.  */
6469
6470 static int
6471 thumb_find_work_register (unsigned long pushed_regs_mask)
6472 {
6473   int reg;
6474
6475   /* Check the argument registers first as these are call-used.  The
6476      register allocation order means that sometimes r3 might be used
6477      but earlier argument registers might not, so check them all.  */
6478   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6479     if (!df_regs_ever_live_p (reg))
6480       return reg;
6481
6482   /* Before going on to check the call-saved registers we can try a couple
6483      more ways of deducing that r3 is available.  The first is when we are
6484      pushing anonymous arguments onto the stack and we have less than 4
6485      registers worth of fixed arguments(*).  In this case r3 will be part of
6486      the variable argument list and so we can be sure that it will be
6487      pushed right at the start of the function.  Hence it will be available
6488      for the rest of the prologue.
6489      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6490   if (cfun->machine->uses_anonymous_args
6491       && crtl->args.pretend_args_size > 0)
6492     return LAST_ARG_REGNUM;
6493
6494   /* The other case is when we have fixed arguments but less than 4 registers
6495      worth.  In this case r3 might be used in the body of the function, but
6496      it is not being used to convey an argument into the function.  In theory
6497      we could just check crtl->args.size to see how many bytes are
6498      being passed in argument registers, but it seems that it is unreliable.
6499      Sometimes it will have the value 0 when in fact arguments are being
6500      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6501      check the args_info.nregs field as well.  The problem with this field is
6502      that it makes no allowances for arguments that are passed to the
6503      function but which are not used.  Hence we could miss an opportunity
6504      when a function has an unused argument in r3.  But it is better to be
6505      safe than to be sorry.  */
6506   if (! cfun->machine->uses_anonymous_args
6507       && crtl->args.size >= 0
6508       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6509       && (TARGET_AAPCS_BASED
6510           ? crtl->args.info.aapcs_ncrn < 4
6511           : crtl->args.info.nregs < 4))
6512     return LAST_ARG_REGNUM;
6513
6514   /* Otherwise look for a call-saved register that is going to be pushed.  */
6515   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6516     if (pushed_regs_mask & (1 << reg))
6517       return reg;
6518
6519   if (TARGET_THUMB2)
6520     {
6521       /* Thumb-2 can use high regs.  */
6522       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6523         if (pushed_regs_mask & (1 << reg))
6524           return reg;
6525     }
6526   /* Something went wrong - thumb_compute_save_reg_mask()
6527      should have arranged for a suitable register to be pushed.  */
6528   gcc_unreachable ();
6529 }
6530
6531 static GTY(()) int pic_labelno;
6532
6533 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6534    low register.  */
6535
6536 void
6537 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6538 {
6539   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6540
6541   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6542     return;
6543
6544   gcc_assert (flag_pic);
6545
6546   pic_reg = cfun->machine->pic_reg;
6547   if (TARGET_VXWORKS_RTP)
6548     {
6549       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6550       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6551       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6552
6553       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6554
6555       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6556       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6557     }
6558   else
6559     {
6560       /* We use an UNSPEC rather than a LABEL_REF because this label
6561          never appears in the code stream.  */
6562
6563       labelno = GEN_INT (pic_labelno++);
6564       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6565       l1 = gen_rtx_CONST (VOIDmode, l1);
6566
6567       /* On the ARM the PC register contains 'dot + 8' at the time of the
6568          addition, on the Thumb it is 'dot + 4'.  */
6569       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6570       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6571                                 UNSPEC_GOTSYM_OFF);
6572       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6573
6574       if (TARGET_32BIT)
6575         {
6576           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6577         }
6578       else /* TARGET_THUMB1 */
6579         {
6580           if (arm_pic_register != INVALID_REGNUM
6581               && REGNO (pic_reg) > LAST_LO_REGNUM)
6582             {
6583               /* We will have pushed the pic register, so we should always be
6584                  able to find a work register.  */
6585               pic_tmp = gen_rtx_REG (SImode,
6586                                      thumb_find_work_register (saved_regs));
6587               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6588               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6589               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6590             }
6591           else if (arm_pic_register != INVALID_REGNUM
6592                    && arm_pic_register > LAST_LO_REGNUM
6593                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6594             {
6595               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6596               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6597               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6598             }
6599           else
6600             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6601         }
6602     }
6603
6604   /* Need to emit this whether or not we obey regdecls,
6605      since setjmp/longjmp can cause life info to screw up.  */
6606   emit_use (pic_reg);
6607 }
6608
6609 /* Generate code to load the address of a static var when flag_pic is set.  */
6610 static rtx
6611 arm_pic_static_addr (rtx orig, rtx reg)
6612 {
6613   rtx l1, labelno, offset_rtx, insn;
6614
6615   gcc_assert (flag_pic);
6616
6617   /* We use an UNSPEC rather than a LABEL_REF because this label
6618      never appears in the code stream.  */
6619   labelno = GEN_INT (pic_labelno++);
6620   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6621   l1 = gen_rtx_CONST (VOIDmode, l1);
6622
6623   /* On the ARM the PC register contains 'dot + 8' at the time of the
6624      addition, on the Thumb it is 'dot + 4'.  */
6625   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6626   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6627                                UNSPEC_SYMBOL_OFFSET);
6628   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6629
6630   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6631   return insn;
6632 }
6633
6634 /* Return nonzero if X is valid as an ARM state addressing register.  */
6635 static int
6636 arm_address_register_rtx_p (rtx x, int strict_p)
6637 {
6638   int regno;
6639
6640   if (!REG_P (x))
6641     return 0;
6642
6643   regno = REGNO (x);
6644
6645   if (strict_p)
6646     return ARM_REGNO_OK_FOR_BASE_P (regno);
6647
6648   return (regno <= LAST_ARM_REGNUM
6649           || regno >= FIRST_PSEUDO_REGISTER
6650           || regno == FRAME_POINTER_REGNUM
6651           || regno == ARG_POINTER_REGNUM);
6652 }
6653
6654 /* Return TRUE if this rtx is the difference of a symbol and a label,
6655    and will reduce to a PC-relative relocation in the object file.
6656    Expressions like this can be left alone when generating PIC, rather
6657    than forced through the GOT.  */
6658 static int
6659 pcrel_constant_p (rtx x)
6660 {
6661   if (GET_CODE (x) == MINUS)
6662     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6663
6664   return FALSE;
6665 }
6666
6667 /* Return true if X will surely end up in an index register after next
6668    splitting pass.  */
6669 static bool
6670 will_be_in_index_register (const_rtx x)
6671 {
6672   /* arm.md: calculate_pic_address will split this into a register.  */
6673   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6674 }
6675
6676 /* Return nonzero if X is a valid ARM state address operand.  */
6677 int
6678 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6679                                 int strict_p)
6680 {
6681   bool use_ldrd;
6682   enum rtx_code code = GET_CODE (x);
6683
6684   if (arm_address_register_rtx_p (x, strict_p))
6685     return 1;
6686
6687   use_ldrd = (TARGET_LDRD
6688               && (mode == DImode
6689                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6690
6691   if (code == POST_INC || code == PRE_DEC
6692       || ((code == PRE_INC || code == POST_DEC)
6693           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6694     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6695
6696   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6697            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6698            && GET_CODE (XEXP (x, 1)) == PLUS
6699            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6700     {
6701       rtx addend = XEXP (XEXP (x, 1), 1);
6702
6703       /* Don't allow ldrd post increment by register because it's hard
6704          to fixup invalid register choices.  */
6705       if (use_ldrd
6706           && GET_CODE (x) == POST_MODIFY
6707           && REG_P (addend))
6708         return 0;
6709
6710       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6711               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6712     }
6713
6714   /* After reload constants split into minipools will have addresses
6715      from a LABEL_REF.  */
6716   else if (reload_completed
6717            && (code == LABEL_REF
6718                || (code == CONST
6719                    && GET_CODE (XEXP (x, 0)) == PLUS
6720                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6721                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6722     return 1;
6723
6724   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6725     return 0;
6726
6727   else if (code == PLUS)
6728     {
6729       rtx xop0 = XEXP (x, 0);
6730       rtx xop1 = XEXP (x, 1);
6731
6732       return ((arm_address_register_rtx_p (xop0, strict_p)
6733                && ((CONST_INT_P (xop1)
6734                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6735                    || (!strict_p && will_be_in_index_register (xop1))))
6736               || (arm_address_register_rtx_p (xop1, strict_p)
6737                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6738     }
6739
6740 #if 0
6741   /* Reload currently can't handle MINUS, so disable this for now */
6742   else if (GET_CODE (x) == MINUS)
6743     {
6744       rtx xop0 = XEXP (x, 0);
6745       rtx xop1 = XEXP (x, 1);
6746
6747       return (arm_address_register_rtx_p (xop0, strict_p)
6748               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6749     }
6750 #endif
6751
6752   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6753            && code == SYMBOL_REF
6754            && CONSTANT_POOL_ADDRESS_P (x)
6755            && ! (flag_pic
6756                  && symbol_mentioned_p (get_pool_constant (x))
6757                  && ! pcrel_constant_p (get_pool_constant (x))))
6758     return 1;
6759
6760   return 0;
6761 }
6762
6763 /* Return nonzero if X is a valid Thumb-2 address operand.  */
6764 static int
6765 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6766 {
6767   bool use_ldrd;
6768   enum rtx_code code = GET_CODE (x);
6769
6770   if (arm_address_register_rtx_p (x, strict_p))
6771     return 1;
6772
6773   use_ldrd = (TARGET_LDRD
6774               && (mode == DImode
6775                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6776
6777   if (code == POST_INC || code == PRE_DEC
6778       || ((code == PRE_INC || code == POST_DEC)
6779           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6780     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6781
6782   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6783            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6784            && GET_CODE (XEXP (x, 1)) == PLUS
6785            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6786     {
6787       /* Thumb-2 only has autoincrement by constant.  */
6788       rtx addend = XEXP (XEXP (x, 1), 1);
6789       HOST_WIDE_INT offset;
6790
6791       if (!CONST_INT_P (addend))
6792         return 0;
6793
6794       offset = INTVAL(addend);
6795       if (GET_MODE_SIZE (mode) <= 4)
6796         return (offset > -256 && offset < 256);
6797
6798       return (use_ldrd && offset > -1024 && offset < 1024
6799               && (offset & 3) == 0);
6800     }
6801
6802   /* After reload constants split into minipools will have addresses
6803      from a LABEL_REF.  */
6804   else if (reload_completed
6805            && (code == LABEL_REF
6806                || (code == CONST
6807                    && GET_CODE (XEXP (x, 0)) == PLUS
6808                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6809                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6810     return 1;
6811
6812   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6813     return 0;
6814
6815   else if (code == PLUS)
6816     {
6817       rtx xop0 = XEXP (x, 0);
6818       rtx xop1 = XEXP (x, 1);
6819
6820       return ((arm_address_register_rtx_p (xop0, strict_p)
6821                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6822                    || (!strict_p && will_be_in_index_register (xop1))))
6823               || (arm_address_register_rtx_p (xop1, strict_p)
6824                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6825     }
6826
6827   /* Normally we can assign constant values to target registers without
6828      the help of constant pool.  But there are cases we have to use constant
6829      pool like:
6830      1) assign a label to register.
6831      2) sign-extend a 8bit value to 32bit and then assign to register.
6832
6833      Constant pool access in format:
6834      (set (reg r0) (mem (symbol_ref (".LC0"))))
6835      will cause the use of literal pool (later in function arm_reorg).
6836      So here we mark such format as an invalid format, then the compiler
6837      will adjust it into:
6838      (set (reg r0) (symbol_ref (".LC0")))
6839      (set (reg r0) (mem (reg r0))).
6840      No extra register is required, and (mem (reg r0)) won't cause the use
6841      of literal pools.  */
6842   else if (arm_disable_literal_pool && code == SYMBOL_REF
6843            && CONSTANT_POOL_ADDRESS_P (x))
6844     return 0;
6845
6846   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6847            && code == SYMBOL_REF
6848            && CONSTANT_POOL_ADDRESS_P (x)
6849            && ! (flag_pic
6850                  && symbol_mentioned_p (get_pool_constant (x))
6851                  && ! pcrel_constant_p (get_pool_constant (x))))
6852     return 1;
6853
6854   return 0;
6855 }
6856
6857 /* Return nonzero if INDEX is valid for an address index operand in
6858    ARM state.  */
6859 static int
6860 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6861                         int strict_p)
6862 {
6863   HOST_WIDE_INT range;
6864   enum rtx_code code = GET_CODE (index);
6865
6866   /* Standard coprocessor addressing modes.  */
6867   if (TARGET_HARD_FLOAT
6868       && TARGET_VFP
6869       && (mode == SFmode || mode == DFmode))
6870     return (code == CONST_INT && INTVAL (index) < 1024
6871             && INTVAL (index) > -1024
6872             && (INTVAL (index) & 3) == 0);
6873
6874   /* For quad modes, we restrict the constant offset to be slightly less
6875      than what the instruction format permits.  We do this because for
6876      quad mode moves, we will actually decompose them into two separate
6877      double-mode reads or writes.  INDEX must therefore be a valid
6878      (double-mode) offset and so should INDEX+8.  */
6879   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6880     return (code == CONST_INT
6881             && INTVAL (index) < 1016
6882             && INTVAL (index) > -1024
6883             && (INTVAL (index) & 3) == 0);
6884
6885   /* We have no such constraint on double mode offsets, so we permit the
6886      full range of the instruction format.  */
6887   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6888     return (code == CONST_INT
6889             && INTVAL (index) < 1024
6890             && INTVAL (index) > -1024
6891             && (INTVAL (index) & 3) == 0);
6892
6893   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6894     return (code == CONST_INT
6895             && INTVAL (index) < 1024
6896             && INTVAL (index) > -1024
6897             && (INTVAL (index) & 3) == 0);
6898
6899   if (arm_address_register_rtx_p (index, strict_p)
6900       && (GET_MODE_SIZE (mode) <= 4))
6901     return 1;
6902
6903   if (mode == DImode || mode == DFmode)
6904     {
6905       if (code == CONST_INT)
6906         {
6907           HOST_WIDE_INT val = INTVAL (index);
6908
6909           if (TARGET_LDRD)
6910             return val > -256 && val < 256;
6911           else
6912             return val > -4096 && val < 4092;
6913         }
6914
6915       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6916     }
6917
6918   if (GET_MODE_SIZE (mode) <= 4
6919       && ! (arm_arch4
6920             && (mode == HImode
6921                 || mode == HFmode
6922                 || (mode == QImode && outer == SIGN_EXTEND))))
6923     {
6924       if (code == MULT)
6925         {
6926           rtx xiop0 = XEXP (index, 0);
6927           rtx xiop1 = XEXP (index, 1);
6928
6929           return ((arm_address_register_rtx_p (xiop0, strict_p)
6930                    && power_of_two_operand (xiop1, SImode))
6931                   || (arm_address_register_rtx_p (xiop1, strict_p)
6932                       && power_of_two_operand (xiop0, SImode)));
6933         }
6934       else if (code == LSHIFTRT || code == ASHIFTRT
6935                || code == ASHIFT || code == ROTATERT)
6936         {
6937           rtx op = XEXP (index, 1);
6938
6939           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6940                   && CONST_INT_P (op)
6941                   && INTVAL (op) > 0
6942                   && INTVAL (op) <= 31);
6943         }
6944     }
6945
6946   /* For ARM v4 we may be doing a sign-extend operation during the
6947      load.  */
6948   if (arm_arch4)
6949     {
6950       if (mode == HImode
6951           || mode == HFmode
6952           || (outer == SIGN_EXTEND && mode == QImode))
6953         range = 256;
6954       else
6955         range = 4096;
6956     }
6957   else
6958     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6959
6960   return (code == CONST_INT
6961           && INTVAL (index) < range
6962           && INTVAL (index) > -range);
6963 }
6964
6965 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6966    index operand.  i.e. 1, 2, 4 or 8.  */
6967 static bool
6968 thumb2_index_mul_operand (rtx op)
6969 {
6970   HOST_WIDE_INT val;
6971
6972   if (!CONST_INT_P (op))
6973     return false;
6974
6975   val = INTVAL(op);
6976   return (val == 1 || val == 2 || val == 4 || val == 8);
6977 }
6978
6979 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
6980 static int
6981 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6982 {
6983   enum rtx_code code = GET_CODE (index);
6984
6985   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
6986   /* Standard coprocessor addressing modes.  */
6987   if (TARGET_HARD_FLOAT
6988       && TARGET_VFP
6989       && (mode == SFmode || mode == DFmode))
6990     return (code == CONST_INT && INTVAL (index) < 1024
6991             /* Thumb-2 allows only > -256 index range for it's core register
6992                load/stores. Since we allow SF/DF in core registers, we have
6993                to use the intersection between -256~4096 (core) and -1024~1024
6994                (coprocessor).  */
6995             && INTVAL (index) > -256
6996             && (INTVAL (index) & 3) == 0);
6997
6998   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6999     {
7000       /* For DImode assume values will usually live in core regs
7001          and only allow LDRD addressing modes.  */
7002       if (!TARGET_LDRD || mode != DImode)
7003         return (code == CONST_INT
7004                 && INTVAL (index) < 1024
7005                 && INTVAL (index) > -1024
7006                 && (INTVAL (index) & 3) == 0);
7007     }
7008
7009   /* For quad modes, we restrict the constant offset to be slightly less
7010      than what the instruction format permits.  We do this because for
7011      quad mode moves, we will actually decompose them into two separate
7012      double-mode reads or writes.  INDEX must therefore be a valid
7013      (double-mode) offset and so should INDEX+8.  */
7014   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7015     return (code == CONST_INT
7016             && INTVAL (index) < 1016
7017             && INTVAL (index) > -1024
7018             && (INTVAL (index) & 3) == 0);
7019
7020   /* We have no such constraint on double mode offsets, so we permit the
7021      full range of the instruction format.  */
7022   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7023     return (code == CONST_INT
7024             && INTVAL (index) < 1024
7025             && INTVAL (index) > -1024
7026             && (INTVAL (index) & 3) == 0);
7027
7028   if (arm_address_register_rtx_p (index, strict_p)
7029       && (GET_MODE_SIZE (mode) <= 4))
7030     return 1;
7031
7032   if (mode == DImode || mode == DFmode)
7033     {
7034       if (code == CONST_INT)
7035         {
7036           HOST_WIDE_INT val = INTVAL (index);
7037           /* ??? Can we assume ldrd for thumb2?  */
7038           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7039           /* ldrd supports offsets of +-1020.
7040              However the ldr fallback does not.  */
7041           return val > -256 && val < 256 && (val & 3) == 0;
7042         }
7043       else
7044         return 0;
7045     }
7046
7047   if (code == MULT)
7048     {
7049       rtx xiop0 = XEXP (index, 0);
7050       rtx xiop1 = XEXP (index, 1);
7051
7052       return ((arm_address_register_rtx_p (xiop0, strict_p)
7053                && thumb2_index_mul_operand (xiop1))
7054               || (arm_address_register_rtx_p (xiop1, strict_p)
7055                   && thumb2_index_mul_operand (xiop0)));
7056     }
7057   else if (code == ASHIFT)
7058     {
7059       rtx op = XEXP (index, 1);
7060
7061       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7062               && CONST_INT_P (op)
7063               && INTVAL (op) > 0
7064               && INTVAL (op) <= 3);
7065     }
7066
7067   return (code == CONST_INT
7068           && INTVAL (index) < 4096
7069           && INTVAL (index) > -256);
7070 }
7071
7072 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7073 static int
7074 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7075 {
7076   int regno;
7077
7078   if (!REG_P (x))
7079     return 0;
7080
7081   regno = REGNO (x);
7082
7083   if (strict_p)
7084     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7085
7086   return (regno <= LAST_LO_REGNUM
7087           || regno > LAST_VIRTUAL_REGISTER
7088           || regno == FRAME_POINTER_REGNUM
7089           || (GET_MODE_SIZE (mode) >= 4
7090               && (regno == STACK_POINTER_REGNUM
7091                   || regno >= FIRST_PSEUDO_REGISTER
7092                   || x == hard_frame_pointer_rtx
7093                   || x == arg_pointer_rtx)));
7094 }
7095
7096 /* Return nonzero if x is a legitimate index register.  This is the case
7097    for any base register that can access a QImode object.  */
7098 inline static int
7099 thumb1_index_register_rtx_p (rtx x, int strict_p)
7100 {
7101   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7102 }
7103
7104 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7105
7106    The AP may be eliminated to either the SP or the FP, so we use the
7107    least common denominator, e.g. SImode, and offsets from 0 to 64.
7108
7109    ??? Verify whether the above is the right approach.
7110
7111    ??? Also, the FP may be eliminated to the SP, so perhaps that
7112    needs special handling also.
7113
7114    ??? Look at how the mips16 port solves this problem.  It probably uses
7115    better ways to solve some of these problems.
7116
7117    Although it is not incorrect, we don't accept QImode and HImode
7118    addresses based on the frame pointer or arg pointer until the
7119    reload pass starts.  This is so that eliminating such addresses
7120    into stack based ones won't produce impossible code.  */
7121 int
7122 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7123 {
7124   /* ??? Not clear if this is right.  Experiment.  */
7125   if (GET_MODE_SIZE (mode) < 4
7126       && !(reload_in_progress || reload_completed)
7127       && (reg_mentioned_p (frame_pointer_rtx, x)
7128           || reg_mentioned_p (arg_pointer_rtx, x)
7129           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7130           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7131           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7132           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7133     return 0;
7134
7135   /* Accept any base register.  SP only in SImode or larger.  */
7136   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7137     return 1;
7138
7139   /* This is PC relative data before arm_reorg runs.  */
7140   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7141            && GET_CODE (x) == SYMBOL_REF
7142            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7143     return 1;
7144
7145   /* This is PC relative data after arm_reorg runs.  */
7146   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7147            && reload_completed
7148            && (GET_CODE (x) == LABEL_REF
7149                || (GET_CODE (x) == CONST
7150                    && GET_CODE (XEXP (x, 0)) == PLUS
7151                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7152                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7153     return 1;
7154
7155   /* Post-inc indexing only supported for SImode and larger.  */
7156   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7157            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7158     return 1;
7159
7160   else if (GET_CODE (x) == PLUS)
7161     {
7162       /* REG+REG address can be any two index registers.  */
7163       /* We disallow FRAME+REG addressing since we know that FRAME
7164          will be replaced with STACK, and SP relative addressing only
7165          permits SP+OFFSET.  */
7166       if (GET_MODE_SIZE (mode) <= 4
7167           && XEXP (x, 0) != frame_pointer_rtx
7168           && XEXP (x, 1) != frame_pointer_rtx
7169           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7170           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7171               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7172         return 1;
7173
7174       /* REG+const has 5-7 bit offset for non-SP registers.  */
7175       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7176                 || XEXP (x, 0) == arg_pointer_rtx)
7177                && CONST_INT_P (XEXP (x, 1))
7178                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7179         return 1;
7180
7181       /* REG+const has 10-bit offset for SP, but only SImode and
7182          larger is supported.  */
7183       /* ??? Should probably check for DI/DFmode overflow here
7184          just like GO_IF_LEGITIMATE_OFFSET does.  */
7185       else if (REG_P (XEXP (x, 0))
7186                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7187                && GET_MODE_SIZE (mode) >= 4
7188                && CONST_INT_P (XEXP (x, 1))
7189                && INTVAL (XEXP (x, 1)) >= 0
7190                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7191                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7192         return 1;
7193
7194       else if (REG_P (XEXP (x, 0))
7195                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7196                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7197                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7198                        && REGNO (XEXP (x, 0))
7199                           <= LAST_VIRTUAL_POINTER_REGISTER))
7200                && GET_MODE_SIZE (mode) >= 4
7201                && CONST_INT_P (XEXP (x, 1))
7202                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7203         return 1;
7204     }
7205
7206   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7207            && GET_MODE_SIZE (mode) == 4
7208            && GET_CODE (x) == SYMBOL_REF
7209            && CONSTANT_POOL_ADDRESS_P (x)
7210            && ! (flag_pic
7211                  && symbol_mentioned_p (get_pool_constant (x))
7212                  && ! pcrel_constant_p (get_pool_constant (x))))
7213     return 1;
7214
7215   return 0;
7216 }
7217
7218 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7219    instruction of mode MODE.  */
7220 int
7221 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7222 {
7223   switch (GET_MODE_SIZE (mode))
7224     {
7225     case 1:
7226       return val >= 0 && val < 32;
7227
7228     case 2:
7229       return val >= 0 && val < 64 && (val & 1) == 0;
7230
7231     default:
7232       return (val >= 0
7233               && (val + GET_MODE_SIZE (mode)) <= 128
7234               && (val & 3) == 0);
7235     }
7236 }
7237
7238 bool
7239 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7240 {
7241   if (TARGET_ARM)
7242     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7243   else if (TARGET_THUMB2)
7244     return thumb2_legitimate_address_p (mode, x, strict_p);
7245   else /* if (TARGET_THUMB1) */
7246     return thumb1_legitimate_address_p (mode, x, strict_p);
7247 }
7248
7249 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7250
7251    Given an rtx X being reloaded into a reg required to be
7252    in class CLASS, return the class of reg to actually use.
7253    In general this is just CLASS, but for the Thumb core registers and
7254    immediate constants we prefer a LO_REGS class or a subset.  */
7255
7256 static reg_class_t
7257 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7258 {
7259   if (TARGET_32BIT)
7260     return rclass;
7261   else
7262     {
7263       if (rclass == GENERAL_REGS)
7264         return LO_REGS;
7265       else
7266         return rclass;
7267     }
7268 }
7269
7270 /* Build the SYMBOL_REF for __tls_get_addr.  */
7271
7272 static GTY(()) rtx tls_get_addr_libfunc;
7273
7274 static rtx
7275 get_tls_get_addr (void)
7276 {
7277   if (!tls_get_addr_libfunc)
7278     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7279   return tls_get_addr_libfunc;
7280 }
7281
7282 rtx
7283 arm_load_tp (rtx target)
7284 {
7285   if (!target)
7286     target = gen_reg_rtx (SImode);
7287
7288   if (TARGET_HARD_TP)
7289     {
7290       /* Can return in any reg.  */
7291       emit_insn (gen_load_tp_hard (target));
7292     }
7293   else
7294     {
7295       /* Always returned in r0.  Immediately copy the result into a pseudo,
7296          otherwise other uses of r0 (e.g. setting up function arguments) may
7297          clobber the value.  */
7298
7299       rtx tmp;
7300
7301       emit_insn (gen_load_tp_soft ());
7302
7303       tmp = gen_rtx_REG (SImode, 0);
7304       emit_move_insn (target, tmp);
7305     }
7306   return target;
7307 }
7308
7309 static rtx
7310 load_tls_operand (rtx x, rtx reg)
7311 {
7312   rtx tmp;
7313
7314   if (reg == NULL_RTX)
7315     reg = gen_reg_rtx (SImode);
7316
7317   tmp = gen_rtx_CONST (SImode, x);
7318
7319   emit_move_insn (reg, tmp);
7320
7321   return reg;
7322 }
7323
7324 static rtx
7325 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7326 {
7327   rtx insns, label, labelno, sum;
7328
7329   gcc_assert (reloc != TLS_DESCSEQ);
7330   start_sequence ();
7331
7332   labelno = GEN_INT (pic_labelno++);
7333   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7334   label = gen_rtx_CONST (VOIDmode, label);
7335
7336   sum = gen_rtx_UNSPEC (Pmode,
7337                         gen_rtvec (4, x, GEN_INT (reloc), label,
7338                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7339                         UNSPEC_TLS);
7340   reg = load_tls_operand (sum, reg);
7341
7342   if (TARGET_ARM)
7343     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7344   else
7345     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7346
7347   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7348                                      LCT_PURE, /* LCT_CONST?  */
7349                                      Pmode, 1, reg, Pmode);
7350
7351   insns = get_insns ();
7352   end_sequence ();
7353
7354   return insns;
7355 }
7356
7357 static rtx
7358 arm_tls_descseq_addr (rtx x, rtx reg)
7359 {
7360   rtx labelno = GEN_INT (pic_labelno++);
7361   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7362   rtx sum = gen_rtx_UNSPEC (Pmode,
7363                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7364                                        gen_rtx_CONST (VOIDmode, label),
7365                                        GEN_INT (!TARGET_ARM)),
7366                             UNSPEC_TLS);
7367   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7368
7369   emit_insn (gen_tlscall (x, labelno));
7370   if (!reg)
7371     reg = gen_reg_rtx (SImode);
7372   else
7373     gcc_assert (REGNO (reg) != 0);
7374
7375   emit_move_insn (reg, reg0);
7376
7377   return reg;
7378 }
7379
7380 rtx
7381 legitimize_tls_address (rtx x, rtx reg)
7382 {
7383   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7384   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7385
7386   switch (model)
7387     {
7388     case TLS_MODEL_GLOBAL_DYNAMIC:
7389       if (TARGET_GNU2_TLS)
7390         {
7391           reg = arm_tls_descseq_addr (x, reg);
7392
7393           tp = arm_load_tp (NULL_RTX);
7394
7395           dest = gen_rtx_PLUS (Pmode, tp, reg);
7396         }
7397       else
7398         {
7399           /* Original scheme */
7400           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7401           dest = gen_reg_rtx (Pmode);
7402           emit_libcall_block (insns, dest, ret, x);
7403         }
7404       return dest;
7405
7406     case TLS_MODEL_LOCAL_DYNAMIC:
7407       if (TARGET_GNU2_TLS)
7408         {
7409           reg = arm_tls_descseq_addr (x, reg);
7410
7411           tp = arm_load_tp (NULL_RTX);
7412
7413           dest = gen_rtx_PLUS (Pmode, tp, reg);
7414         }
7415       else
7416         {
7417           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7418
7419           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7420              share the LDM result with other LD model accesses.  */
7421           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7422                                 UNSPEC_TLS);
7423           dest = gen_reg_rtx (Pmode);
7424           emit_libcall_block (insns, dest, ret, eqv);
7425
7426           /* Load the addend.  */
7427           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7428                                                      GEN_INT (TLS_LDO32)),
7429                                    UNSPEC_TLS);
7430           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7431           dest = gen_rtx_PLUS (Pmode, dest, addend);
7432         }
7433       return dest;
7434
7435     case TLS_MODEL_INITIAL_EXEC:
7436       labelno = GEN_INT (pic_labelno++);
7437       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7438       label = gen_rtx_CONST (VOIDmode, label);
7439       sum = gen_rtx_UNSPEC (Pmode,
7440                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7441                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7442                             UNSPEC_TLS);
7443       reg = load_tls_operand (sum, reg);
7444
7445       if (TARGET_ARM)
7446         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7447       else if (TARGET_THUMB2)
7448         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7449       else
7450         {
7451           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7452           emit_move_insn (reg, gen_const_mem (SImode, reg));
7453         }
7454
7455       tp = arm_load_tp (NULL_RTX);
7456
7457       return gen_rtx_PLUS (Pmode, tp, reg);
7458
7459     case TLS_MODEL_LOCAL_EXEC:
7460       tp = arm_load_tp (NULL_RTX);
7461
7462       reg = gen_rtx_UNSPEC (Pmode,
7463                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7464                             UNSPEC_TLS);
7465       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7466
7467       return gen_rtx_PLUS (Pmode, tp, reg);
7468
7469     default:
7470       abort ();
7471     }
7472 }
7473
7474 /* Try machine-dependent ways of modifying an illegitimate address
7475    to be legitimate.  If we find one, return the new, valid address.  */
7476 rtx
7477 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7478 {
7479   if (arm_tls_referenced_p (x))
7480     {
7481       rtx addend = NULL;
7482
7483       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7484         {
7485           addend = XEXP (XEXP (x, 0), 1);
7486           x = XEXP (XEXP (x, 0), 0);
7487         }
7488
7489       if (GET_CODE (x) != SYMBOL_REF)
7490         return x;
7491
7492       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7493
7494       x = legitimize_tls_address (x, NULL_RTX);
7495
7496       if (addend)
7497         {
7498           x = gen_rtx_PLUS (SImode, x, addend);
7499           orig_x = x;
7500         }
7501       else
7502         return x;
7503     }
7504
7505   if (!TARGET_ARM)
7506     {
7507       /* TODO: legitimize_address for Thumb2.  */
7508       if (TARGET_THUMB2)
7509         return x;
7510       return thumb_legitimize_address (x, orig_x, mode);
7511     }
7512
7513   if (GET_CODE (x) == PLUS)
7514     {
7515       rtx xop0 = XEXP (x, 0);
7516       rtx xop1 = XEXP (x, 1);
7517
7518       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7519         xop0 = force_reg (SImode, xop0);
7520
7521       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7522           && !symbol_mentioned_p (xop1))
7523         xop1 = force_reg (SImode, xop1);
7524
7525       if (ARM_BASE_REGISTER_RTX_P (xop0)
7526           && CONST_INT_P (xop1))
7527         {
7528           HOST_WIDE_INT n, low_n;
7529           rtx base_reg, val;
7530           n = INTVAL (xop1);
7531
7532           /* VFP addressing modes actually allow greater offsets, but for
7533              now we just stick with the lowest common denominator.  */
7534           if (mode == DImode
7535               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7536             {
7537               low_n = n & 0x0f;
7538               n &= ~0x0f;
7539               if (low_n > 4)
7540                 {
7541                   n += 16;
7542                   low_n -= 16;
7543                 }
7544             }
7545           else
7546             {
7547               low_n = ((mode) == TImode ? 0
7548                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7549               n -= low_n;
7550             }
7551
7552           base_reg = gen_reg_rtx (SImode);
7553           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7554           emit_move_insn (base_reg, val);
7555           x = plus_constant (Pmode, base_reg, low_n);
7556         }
7557       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7558         x = gen_rtx_PLUS (SImode, xop0, xop1);
7559     }
7560
7561   /* XXX We don't allow MINUS any more -- see comment in
7562      arm_legitimate_address_outer_p ().  */
7563   else if (GET_CODE (x) == MINUS)
7564     {
7565       rtx xop0 = XEXP (x, 0);
7566       rtx xop1 = XEXP (x, 1);
7567
7568       if (CONSTANT_P (xop0))
7569         xop0 = force_reg (SImode, xop0);
7570
7571       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7572         xop1 = force_reg (SImode, xop1);
7573
7574       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7575         x = gen_rtx_MINUS (SImode, xop0, xop1);
7576     }
7577
7578   /* Make sure to take full advantage of the pre-indexed addressing mode
7579      with absolute addresses which often allows for the base register to
7580      be factorized for multiple adjacent memory references, and it might
7581      even allows for the mini pool to be avoided entirely. */
7582   else if (CONST_INT_P (x) && optimize > 0)
7583     {
7584       unsigned int bits;
7585       HOST_WIDE_INT mask, base, index;
7586       rtx base_reg;
7587
7588       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7589          use a 8-bit index. So let's use a 12-bit index for SImode only and
7590          hope that arm_gen_constant will enable ldrb to use more bits. */
7591       bits = (mode == SImode) ? 12 : 8;
7592       mask = (1 << bits) - 1;
7593       base = INTVAL (x) & ~mask;
7594       index = INTVAL (x) & mask;
7595       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7596         {
7597           /* It'll most probably be more efficient to generate the base
7598              with more bits set and use a negative index instead. */
7599           base |= mask;
7600           index -= mask;
7601         }
7602       base_reg = force_reg (SImode, GEN_INT (base));
7603       x = plus_constant (Pmode, base_reg, index);
7604     }
7605
7606   if (flag_pic)
7607     {
7608       /* We need to find and carefully transform any SYMBOL and LABEL
7609          references; so go back to the original address expression.  */
7610       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7611
7612       if (new_x != orig_x)
7613         x = new_x;
7614     }
7615
7616   return x;
7617 }
7618
7619
7620 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7621    to be legitimate.  If we find one, return the new, valid address.  */
7622 rtx
7623 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7624 {
7625   if (GET_CODE (x) == PLUS
7626       && CONST_INT_P (XEXP (x, 1))
7627       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7628           || INTVAL (XEXP (x, 1)) < 0))
7629     {
7630       rtx xop0 = XEXP (x, 0);
7631       rtx xop1 = XEXP (x, 1);
7632       HOST_WIDE_INT offset = INTVAL (xop1);
7633
7634       /* Try and fold the offset into a biasing of the base register and
7635          then offsetting that.  Don't do this when optimizing for space
7636          since it can cause too many CSEs.  */
7637       if (optimize_size && offset >= 0
7638           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7639         {
7640           HOST_WIDE_INT delta;
7641
7642           if (offset >= 256)
7643             delta = offset - (256 - GET_MODE_SIZE (mode));
7644           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7645             delta = 31 * GET_MODE_SIZE (mode);
7646           else
7647             delta = offset & (~31 * GET_MODE_SIZE (mode));
7648
7649           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7650                                 NULL_RTX);
7651           x = plus_constant (Pmode, xop0, delta);
7652         }
7653       else if (offset < 0 && offset > -256)
7654         /* Small negative offsets are best done with a subtract before the
7655            dereference, forcing these into a register normally takes two
7656            instructions.  */
7657         x = force_operand (x, NULL_RTX);
7658       else
7659         {
7660           /* For the remaining cases, force the constant into a register.  */
7661           xop1 = force_reg (SImode, xop1);
7662           x = gen_rtx_PLUS (SImode, xop0, xop1);
7663         }
7664     }
7665   else if (GET_CODE (x) == PLUS
7666            && s_register_operand (XEXP (x, 1), SImode)
7667            && !s_register_operand (XEXP (x, 0), SImode))
7668     {
7669       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7670
7671       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7672     }
7673
7674   if (flag_pic)
7675     {
7676       /* We need to find and carefully transform any SYMBOL and LABEL
7677          references; so go back to the original address expression.  */
7678       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7679
7680       if (new_x != orig_x)
7681         x = new_x;
7682     }
7683
7684   return x;
7685 }
7686
7687 bool
7688 arm_legitimize_reload_address (rtx *p,
7689                                enum machine_mode mode,
7690                                int opnum, int type,
7691                                int ind_levels ATTRIBUTE_UNUSED)
7692 {
7693   /* We must recognize output that we have already generated ourselves.  */
7694   if (GET_CODE (*p) == PLUS
7695       && GET_CODE (XEXP (*p, 0)) == PLUS
7696       && REG_P (XEXP (XEXP (*p, 0), 0))
7697       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7698       && CONST_INT_P (XEXP (*p, 1)))
7699     {
7700       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7701                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7702                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7703       return true;
7704     }
7705
7706   if (GET_CODE (*p) == PLUS
7707       && REG_P (XEXP (*p, 0))
7708       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7709       /* If the base register is equivalent to a constant, let the generic
7710          code handle it.  Otherwise we will run into problems if a future
7711          reload pass decides to rematerialize the constant.  */
7712       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7713       && CONST_INT_P (XEXP (*p, 1)))
7714     {
7715       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7716       HOST_WIDE_INT low, high;
7717
7718       /* Detect coprocessor load/stores.  */
7719       bool coproc_p = ((TARGET_HARD_FLOAT
7720                         && TARGET_VFP
7721                         && (mode == SFmode || mode == DFmode))
7722                        || (TARGET_REALLY_IWMMXT
7723                            && VALID_IWMMXT_REG_MODE (mode))
7724                        || (TARGET_NEON
7725                            && (VALID_NEON_DREG_MODE (mode)
7726                                || VALID_NEON_QREG_MODE (mode))));
7727
7728       /* For some conditions, bail out when lower two bits are unaligned.  */
7729       if ((val & 0x3) != 0
7730           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7731           && (coproc_p
7732               /* For DI, and DF under soft-float: */
7733               || ((mode == DImode || mode == DFmode)
7734                   /* Without ldrd, we use stm/ldm, which does not
7735                      fair well with unaligned bits.  */
7736                   && (! TARGET_LDRD
7737                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7738                       || TARGET_THUMB2))))
7739         return false;
7740
7741       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7742          of which the (reg+high) gets turned into a reload add insn,
7743          we try to decompose the index into high/low values that can often
7744          also lead to better reload CSE.
7745          For example:
7746                  ldr r0, [r2, #4100]  // Offset too large
7747                  ldr r1, [r2, #4104]  // Offset too large
7748
7749          is best reloaded as:
7750                  add t1, r2, #4096
7751                  ldr r0, [t1, #4]
7752                  add t2, r2, #4096
7753                  ldr r1, [t2, #8]
7754
7755          which post-reload CSE can simplify in most cases to eliminate the
7756          second add instruction:
7757                  add t1, r2, #4096
7758                  ldr r0, [t1, #4]
7759                  ldr r1, [t1, #8]
7760
7761          The idea here is that we want to split out the bits of the constant
7762          as a mask, rather than as subtracting the maximum offset that the
7763          respective type of load/store used can handle.
7764
7765          When encountering negative offsets, we can still utilize it even if
7766          the overall offset is positive; sometimes this may lead to an immediate
7767          that can be constructed with fewer instructions.
7768          For example:
7769                  ldr r0, [r2, #0x3FFFFC]
7770
7771          This is best reloaded as:
7772                  add t1, r2, #0x400000
7773                  ldr r0, [t1, #-4]
7774
7775          The trick for spotting this for a load insn with N bits of offset
7776          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7777          negative offset that is going to make bit N and all the bits below
7778          it become zero in the remainder part.
7779
7780          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7781          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7782          used in most cases of ARM load/store instructions.  */
7783
7784 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
7785       (((VAL) & ((1 << (N)) - 1))                                       \
7786        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
7787        : 0)
7788
7789       if (coproc_p)
7790         {
7791           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7792
7793           /* NEON quad-word load/stores are made of two double-word accesses,
7794              so the valid index range is reduced by 8. Treat as 9-bit range if
7795              we go over it.  */
7796           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7797             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7798         }
7799       else if (GET_MODE_SIZE (mode) == 8)
7800         {
7801           if (TARGET_LDRD)
7802             low = (TARGET_THUMB2
7803                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7804                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7805           else
7806             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7807                to access doublewords. The supported load/store offsets are
7808                -8, -4, and 4, which we try to produce here.  */
7809             low = ((val & 0xf) ^ 0x8) - 0x8;
7810         }
7811       else if (GET_MODE_SIZE (mode) < 8)
7812         {
7813           /* NEON element load/stores do not have an offset.  */
7814           if (TARGET_NEON_FP16 && mode == HFmode)
7815             return false;
7816
7817           if (TARGET_THUMB2)
7818             {
7819               /* Thumb-2 has an asymmetrical index range of (-256,4096).
7820                  Try the wider 12-bit range first, and re-try if the result
7821                  is out of range.  */
7822               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7823               if (low < -255)
7824                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7825             }
7826           else
7827             {
7828               if (mode == HImode || mode == HFmode)
7829                 {
7830                   if (arm_arch4)
7831                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7832                   else
7833                     {
7834                       /* The storehi/movhi_bytes fallbacks can use only
7835                          [-4094,+4094] of the full ldrb/strb index range.  */
7836                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7837                       if (low == 4095 || low == -4095)
7838                         return false;
7839                     }
7840                 }
7841               else
7842                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7843             }
7844         }
7845       else
7846         return false;
7847
7848       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7849                ^ (unsigned HOST_WIDE_INT) 0x80000000)
7850               - (unsigned HOST_WIDE_INT) 0x80000000);
7851       /* Check for overflow or zero */
7852       if (low == 0 || high == 0 || (high + low != val))
7853         return false;
7854
7855       /* Reload the high part into a base reg; leave the low part
7856          in the mem.
7857          Note that replacing this gen_rtx_PLUS with plus_constant is
7858          wrong in this case because we rely on the
7859          (plus (plus reg c1) c2) structure being preserved so that
7860          XEXP (*p, 0) in push_reload below uses the correct term.  */
7861       *p = gen_rtx_PLUS (GET_MODE (*p),
7862                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7863                                        GEN_INT (high)),
7864                          GEN_INT (low));
7865       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7866                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7867                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7868       return true;
7869     }
7870
7871   return false;
7872 }
7873
7874 rtx
7875 thumb_legitimize_reload_address (rtx *x_p,
7876                                  enum machine_mode mode,
7877                                  int opnum, int type,
7878                                  int ind_levels ATTRIBUTE_UNUSED)
7879 {
7880   rtx x = *x_p;
7881
7882   if (GET_CODE (x) == PLUS
7883       && GET_MODE_SIZE (mode) < 4
7884       && REG_P (XEXP (x, 0))
7885       && XEXP (x, 0) == stack_pointer_rtx
7886       && CONST_INT_P (XEXP (x, 1))
7887       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7888     {
7889       rtx orig_x = x;
7890
7891       x = copy_rtx (x);
7892       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7893                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7894       return x;
7895     }
7896
7897   /* If both registers are hi-regs, then it's better to reload the
7898      entire expression rather than each register individually.  That
7899      only requires one reload register rather than two.  */
7900   if (GET_CODE (x) == PLUS
7901       && REG_P (XEXP (x, 0))
7902       && REG_P (XEXP (x, 1))
7903       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7904       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7905     {
7906       rtx orig_x = x;
7907
7908       x = copy_rtx (x);
7909       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7910                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7911       return x;
7912     }
7913
7914   return NULL;
7915 }
7916
7917 /* Test for various thread-local symbols.  */
7918
7919 /* Helper for arm_tls_referenced_p.  */
7920
7921 static int
7922 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7923 {
7924   if (GET_CODE (*x) == SYMBOL_REF)
7925     return SYMBOL_REF_TLS_MODEL (*x) != 0;
7926
7927   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7928      TLS offsets, not real symbol references.  */
7929   if (GET_CODE (*x) == UNSPEC
7930       && XINT (*x, 1) == UNSPEC_TLS)
7931     return -1;
7932
7933   return 0;
7934 }
7935
7936 /* Return TRUE if X contains any TLS symbol references.  */
7937
7938 bool
7939 arm_tls_referenced_p (rtx x)
7940 {
7941   if (! TARGET_HAVE_TLS)
7942     return false;
7943
7944   return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7945 }
7946
7947 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7948
7949    On the ARM, allow any integer (invalid ones are removed later by insn
7950    patterns), nice doubles and symbol_refs which refer to the function's
7951    constant pool XXX.
7952
7953    When generating pic allow anything.  */
7954
7955 static bool
7956 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7957 {
7958   /* At present, we have no support for Neon structure constants, so forbid
7959      them here.  It might be possible to handle simple cases like 0 and -1
7960      in future.  */
7961   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7962     return false;
7963
7964   return flag_pic || !label_mentioned_p (x);
7965 }
7966
7967 static bool
7968 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7969 {
7970   return (CONST_INT_P (x)
7971           || CONST_DOUBLE_P (x)
7972           || CONSTANT_ADDRESS_P (x)
7973           || flag_pic);
7974 }
7975
7976 static bool
7977 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7978 {
7979   return (!arm_cannot_force_const_mem (mode, x)
7980           && (TARGET_32BIT
7981               ? arm_legitimate_constant_p_1 (mode, x)
7982               : thumb_legitimate_constant_p (mode, x)));
7983 }
7984
7985 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
7986
7987 static bool
7988 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7989 {
7990   rtx base, offset;
7991
7992   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7993     {
7994       split_const (x, &base, &offset);
7995       if (GET_CODE (base) == SYMBOL_REF
7996           && !offset_within_block_p (base, INTVAL (offset)))
7997         return true;
7998     }
7999   return arm_tls_referenced_p (x);
8000 }
8001 \f
8002 #define REG_OR_SUBREG_REG(X)                                            \
8003   (REG_P (X)                                                    \
8004    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8005
8006 #define REG_OR_SUBREG_RTX(X)                    \
8007    (REG_P (X) ? (X) : SUBREG_REG (X))
8008
8009 static inline int
8010 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8011 {
8012   enum machine_mode mode = GET_MODE (x);
8013   int total, words;
8014
8015   switch (code)
8016     {
8017     case ASHIFT:
8018     case ASHIFTRT:
8019     case LSHIFTRT:
8020     case ROTATERT:
8021       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8022
8023     case PLUS:
8024     case MINUS:
8025     case COMPARE:
8026     case NEG:
8027     case NOT:
8028       return COSTS_N_INSNS (1);
8029
8030     case MULT:
8031       if (CONST_INT_P (XEXP (x, 1)))
8032         {
8033           int cycles = 0;
8034           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8035
8036           while (i)
8037             {
8038               i >>= 2;
8039               cycles++;
8040             }
8041           return COSTS_N_INSNS (2) + cycles;
8042         }
8043       return COSTS_N_INSNS (1) + 16;
8044
8045     case SET:
8046       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8047          the mode.  */
8048       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8049       return (COSTS_N_INSNS (words)
8050               + 4 * ((MEM_P (SET_SRC (x)))
8051                      + MEM_P (SET_DEST (x))));
8052
8053     case CONST_INT:
8054       if (outer == SET)
8055         {
8056           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8057             return 0;
8058           if (thumb_shiftable_const (INTVAL (x)))
8059             return COSTS_N_INSNS (2);
8060           return COSTS_N_INSNS (3);
8061         }
8062       else if ((outer == PLUS || outer == COMPARE)
8063                && INTVAL (x) < 256 && INTVAL (x) > -256)
8064         return 0;
8065       else if ((outer == IOR || outer == XOR || outer == AND)
8066                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8067         return COSTS_N_INSNS (1);
8068       else if (outer == AND)
8069         {
8070           int i;
8071           /* This duplicates the tests in the andsi3 expander.  */
8072           for (i = 9; i <= 31; i++)
8073             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8074                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8075               return COSTS_N_INSNS (2);
8076         }
8077       else if (outer == ASHIFT || outer == ASHIFTRT
8078                || outer == LSHIFTRT)
8079         return 0;
8080       return COSTS_N_INSNS (2);
8081
8082     case CONST:
8083     case CONST_DOUBLE:
8084     case LABEL_REF:
8085     case SYMBOL_REF:
8086       return COSTS_N_INSNS (3);
8087
8088     case UDIV:
8089     case UMOD:
8090     case DIV:
8091     case MOD:
8092       return 100;
8093
8094     case TRUNCATE:
8095       return 99;
8096
8097     case AND:
8098     case XOR:
8099     case IOR:
8100       /* XXX guess.  */
8101       return 8;
8102
8103     case MEM:
8104       /* XXX another guess.  */
8105       /* Memory costs quite a lot for the first word, but subsequent words
8106          load at the equivalent of a single insn each.  */
8107       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8108               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8109                  ? 4 : 0));
8110
8111     case IF_THEN_ELSE:
8112       /* XXX a guess.  */
8113       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8114         return 14;
8115       return 2;
8116
8117     case SIGN_EXTEND:
8118     case ZERO_EXTEND:
8119       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8120       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8121
8122       if (mode == SImode)
8123         return total;
8124
8125       if (arm_arch6)
8126         return total + COSTS_N_INSNS (1);
8127
8128       /* Assume a two-shift sequence.  Increase the cost slightly so
8129          we prefer actual shifts over an extend operation.  */
8130       return total + 1 + COSTS_N_INSNS (2);
8131
8132     default:
8133       return 99;
8134     }
8135 }
8136
8137 static inline bool
8138 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8139 {
8140   enum machine_mode mode = GET_MODE (x);
8141   enum rtx_code subcode;
8142   rtx operand;
8143   enum rtx_code code = GET_CODE (x);
8144   *total = 0;
8145
8146   switch (code)
8147     {
8148     case MEM:
8149       /* Memory costs quite a lot for the first word, but subsequent words
8150          load at the equivalent of a single insn each.  */
8151       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8152       return true;
8153
8154     case DIV:
8155     case MOD:
8156     case UDIV:
8157     case UMOD:
8158       if (TARGET_HARD_FLOAT && mode == SFmode)
8159         *total = COSTS_N_INSNS (2);
8160       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8161         *total = COSTS_N_INSNS (4);
8162       else
8163         *total = COSTS_N_INSNS (20);
8164       return false;
8165
8166     case ROTATE:
8167       if (REG_P (XEXP (x, 1)))
8168         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8169       else if (!CONST_INT_P (XEXP (x, 1)))
8170         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8171
8172       /* Fall through */
8173     case ROTATERT:
8174       if (mode != SImode)
8175         {
8176           *total += COSTS_N_INSNS (4);
8177           return true;
8178         }
8179
8180       /* Fall through */
8181     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8182       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8183       if (mode == DImode)
8184         {
8185           *total += COSTS_N_INSNS (3);
8186           return true;
8187         }
8188
8189       *total += COSTS_N_INSNS (1);
8190       /* Increase the cost of complex shifts because they aren't any faster,
8191          and reduce dual issue opportunities.  */
8192       if (arm_tune_cortex_a9
8193           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8194         ++*total;
8195
8196       return true;
8197
8198     case MINUS:
8199       if (mode == DImode)
8200         {
8201           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8202           if (CONST_INT_P (XEXP (x, 0))
8203               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8204             {
8205               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8206               return true;
8207             }
8208
8209           if (CONST_INT_P (XEXP (x, 1))
8210               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8211             {
8212               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8213               return true;
8214             }
8215
8216           return false;
8217         }
8218
8219       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8220         {
8221           if (TARGET_HARD_FLOAT
8222               && (mode == SFmode
8223                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8224             {
8225               *total = COSTS_N_INSNS (1);
8226               if (CONST_DOUBLE_P (XEXP (x, 0))
8227                   && arm_const_double_rtx (XEXP (x, 0)))
8228                 {
8229                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8230                   return true;
8231                 }
8232
8233               if (CONST_DOUBLE_P (XEXP (x, 1))
8234                   && arm_const_double_rtx (XEXP (x, 1)))
8235                 {
8236                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8237                   return true;
8238                 }
8239
8240               return false;
8241             }
8242           *total = COSTS_N_INSNS (20);
8243           return false;
8244         }
8245
8246       *total = COSTS_N_INSNS (1);
8247       if (CONST_INT_P (XEXP (x, 0))
8248           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8249         {
8250           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8251           return true;
8252         }
8253
8254       subcode = GET_CODE (XEXP (x, 1));
8255       if (subcode == ASHIFT || subcode == ASHIFTRT
8256           || subcode == LSHIFTRT
8257           || subcode == ROTATE || subcode == ROTATERT)
8258         {
8259           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8260           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8261           return true;
8262         }
8263
8264       /* A shift as a part of RSB costs no more than RSB itself.  */
8265       if (GET_CODE (XEXP (x, 0)) == MULT
8266           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8267         {
8268           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8269           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8270           return true;
8271         }
8272
8273       if (subcode == MULT
8274           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8275         {
8276           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8277           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8278           return true;
8279         }
8280
8281       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8282           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8283         {
8284           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8285           if (REG_P (XEXP (XEXP (x, 1), 0))
8286               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8287             *total += COSTS_N_INSNS (1);
8288
8289           return true;
8290         }
8291
8292       /* Fall through */
8293
8294     case PLUS:
8295       if (code == PLUS && arm_arch6 && mode == SImode
8296           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8297               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8298         {
8299           *total = COSTS_N_INSNS (1);
8300           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8301                               0, speed);
8302           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8303           return true;
8304         }
8305
8306       /* MLA: All arguments must be registers.  We filter out
8307          multiplication by a power of two, so that we fall down into
8308          the code below.  */
8309       if (GET_CODE (XEXP (x, 0)) == MULT
8310           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8311         {
8312           /* The cost comes from the cost of the multiply.  */
8313           return false;
8314         }
8315
8316       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8317         {
8318           if (TARGET_HARD_FLOAT
8319               && (mode == SFmode
8320                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8321             {
8322               *total = COSTS_N_INSNS (1);
8323               if (CONST_DOUBLE_P (XEXP (x, 1))
8324                   && arm_const_double_rtx (XEXP (x, 1)))
8325                 {
8326                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8327                   return true;
8328                 }
8329
8330               return false;
8331             }
8332
8333           *total = COSTS_N_INSNS (20);
8334           return false;
8335         }
8336
8337       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8338           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8339         {
8340           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8341           if (REG_P (XEXP (XEXP (x, 0), 0))
8342               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8343             *total += COSTS_N_INSNS (1);
8344           return true;
8345         }
8346
8347       /* Fall through */
8348
8349     case AND: case XOR: case IOR:
8350
8351       /* Normally the frame registers will be spilt into reg+const during
8352          reload, so it is a bad idea to combine them with other instructions,
8353          since then they might not be moved outside of loops.  As a compromise
8354          we allow integration with ops that have a constant as their second
8355          operand.  */
8356       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8357           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8358           && !CONST_INT_P (XEXP (x, 1)))
8359         *total = COSTS_N_INSNS (1);
8360
8361       if (mode == DImode)
8362         {
8363           *total += COSTS_N_INSNS (2);
8364           if (CONST_INT_P (XEXP (x, 1))
8365               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8366             {
8367               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8368               return true;
8369             }
8370
8371           return false;
8372         }
8373
8374       *total += COSTS_N_INSNS (1);
8375       if (CONST_INT_P (XEXP (x, 1))
8376           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8377         {
8378           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8379           return true;
8380         }
8381       subcode = GET_CODE (XEXP (x, 0));
8382       if (subcode == ASHIFT || subcode == ASHIFTRT
8383           || subcode == LSHIFTRT
8384           || subcode == ROTATE || subcode == ROTATERT)
8385         {
8386           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8387           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8388           return true;
8389         }
8390
8391       if (subcode == MULT
8392           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8393         {
8394           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8395           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8396           return true;
8397         }
8398
8399       if (subcode == UMIN || subcode == UMAX
8400           || subcode == SMIN || subcode == SMAX)
8401         {
8402           *total = COSTS_N_INSNS (3);
8403           return true;
8404         }
8405
8406       return false;
8407
8408     case MULT:
8409       /* This should have been handled by the CPU specific routines.  */
8410       gcc_unreachable ();
8411
8412     case TRUNCATE:
8413       if (arm_arch3m && mode == SImode
8414           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8415           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8416           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8417               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8418           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8419               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8420         {
8421           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8422           return true;
8423         }
8424       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8425       return false;
8426
8427     case NEG:
8428       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8429         {
8430           if (TARGET_HARD_FLOAT
8431               && (mode == SFmode
8432                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8433             {
8434               *total = COSTS_N_INSNS (1);
8435               return false;
8436             }
8437           *total = COSTS_N_INSNS (2);
8438           return false;
8439         }
8440
8441       /* Fall through */
8442     case NOT:
8443       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8444       if (mode == SImode && code == NOT)
8445         {
8446           subcode = GET_CODE (XEXP (x, 0));
8447           if (subcode == ASHIFT || subcode == ASHIFTRT
8448               || subcode == LSHIFTRT
8449               || subcode == ROTATE || subcode == ROTATERT
8450               || (subcode == MULT
8451                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8452             {
8453               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8454               /* Register shifts cost an extra cycle.  */
8455               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8456                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8457                                                         subcode, 1, speed);
8458               return true;
8459             }
8460         }
8461
8462       return false;
8463
8464     case IF_THEN_ELSE:
8465       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8466         {
8467           *total = COSTS_N_INSNS (4);
8468           return true;
8469         }
8470
8471       operand = XEXP (x, 0);
8472
8473       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8474              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8475             && REG_P (XEXP (operand, 0))
8476             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8477         *total += COSTS_N_INSNS (1);
8478       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8479                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8480       return true;
8481
8482     case NE:
8483       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8484         {
8485           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8486           return true;
8487         }
8488       goto scc_insn;
8489
8490     case GE:
8491       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8492           && mode == SImode && XEXP (x, 1) == const0_rtx)
8493         {
8494           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8495           return true;
8496         }
8497       goto scc_insn;
8498
8499     case LT:
8500       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8501           && mode == SImode && XEXP (x, 1) == const0_rtx)
8502         {
8503           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8504           return true;
8505         }
8506       goto scc_insn;
8507
8508     case EQ:
8509     case GT:
8510     case LE:
8511     case GEU:
8512     case LTU:
8513     case GTU:
8514     case LEU:
8515     case UNORDERED:
8516     case ORDERED:
8517     case UNEQ:
8518     case UNGE:
8519     case UNLT:
8520     case UNGT:
8521     case UNLE:
8522     scc_insn:
8523       /* SCC insns.  In the case where the comparison has already been
8524          performed, then they cost 2 instructions.  Otherwise they need
8525          an additional comparison before them.  */
8526       *total = COSTS_N_INSNS (2);
8527       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8528         {
8529           return true;
8530         }
8531
8532       /* Fall through */
8533     case COMPARE:
8534       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8535         {
8536           *total = 0;
8537           return true;
8538         }
8539
8540       *total += COSTS_N_INSNS (1);
8541       if (CONST_INT_P (XEXP (x, 1))
8542           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8543         {
8544           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8545           return true;
8546         }
8547
8548       subcode = GET_CODE (XEXP (x, 0));
8549       if (subcode == ASHIFT || subcode == ASHIFTRT
8550           || subcode == LSHIFTRT
8551           || subcode == ROTATE || subcode == ROTATERT)
8552         {
8553           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8554           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8555           return true;
8556         }
8557
8558       if (subcode == MULT
8559           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8560         {
8561           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8562           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8563           return true;
8564         }
8565
8566       return false;
8567
8568     case UMIN:
8569     case UMAX:
8570     case SMIN:
8571     case SMAX:
8572       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8573       if (!CONST_INT_P (XEXP (x, 1))
8574           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8575         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8576       return true;
8577
8578     case ABS:
8579       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8580         {
8581           if (TARGET_HARD_FLOAT
8582               && (mode == SFmode
8583                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8584             {
8585               *total = COSTS_N_INSNS (1);
8586               return false;
8587             }
8588           *total = COSTS_N_INSNS (20);
8589           return false;
8590         }
8591       *total = COSTS_N_INSNS (1);
8592       if (mode == DImode)
8593         *total += COSTS_N_INSNS (3);
8594       return false;
8595
8596     case SIGN_EXTEND:
8597     case ZERO_EXTEND:
8598       *total = 0;
8599       if (GET_MODE_CLASS (mode) == MODE_INT)
8600         {
8601           rtx op = XEXP (x, 0);
8602           enum machine_mode opmode = GET_MODE (op);
8603
8604           if (mode == DImode)
8605             *total += COSTS_N_INSNS (1);
8606
8607           if (opmode != SImode)
8608             {
8609               if (MEM_P (op))
8610                 {
8611                   /* If !arm_arch4, we use one of the extendhisi2_mem
8612                      or movhi_bytes patterns for HImode.  For a QImode
8613                      sign extension, we first zero-extend from memory
8614                      and then perform a shift sequence.  */
8615                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8616                     *total += COSTS_N_INSNS (2);
8617                 }
8618               else if (arm_arch6)
8619                 *total += COSTS_N_INSNS (1);
8620
8621               /* We don't have the necessary insn, so we need to perform some
8622                  other operation.  */
8623               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8624                 /* An and with constant 255.  */
8625                 *total += COSTS_N_INSNS (1);
8626               else
8627                 /* A shift sequence.  Increase costs slightly to avoid
8628                    combining two shifts into an extend operation.  */
8629                 *total += COSTS_N_INSNS (2) + 1;
8630             }
8631
8632           return false;
8633         }
8634
8635       switch (GET_MODE (XEXP (x, 0)))
8636         {
8637         case V8QImode:
8638         case V4HImode:
8639         case V2SImode:
8640         case V4QImode:
8641         case V2HImode:
8642           *total = COSTS_N_INSNS (1);
8643           return false;
8644
8645         default:
8646           gcc_unreachable ();
8647         }
8648       gcc_unreachable ();
8649
8650     case ZERO_EXTRACT:
8651     case SIGN_EXTRACT:
8652       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8653       return true;
8654
8655     case CONST_INT:
8656       if (const_ok_for_arm (INTVAL (x))
8657           || const_ok_for_arm (~INTVAL (x)))
8658         *total = COSTS_N_INSNS (1);
8659       else
8660         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8661                                                   INTVAL (x), NULL_RTX,
8662                                                   NULL_RTX, 0, 0));
8663       return true;
8664
8665     case CONST:
8666     case LABEL_REF:
8667     case SYMBOL_REF:
8668       *total = COSTS_N_INSNS (3);
8669       return true;
8670
8671     case HIGH:
8672       *total = COSTS_N_INSNS (1);
8673       return true;
8674
8675     case LO_SUM:
8676       *total = COSTS_N_INSNS (1);
8677       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8678       return true;
8679
8680     case CONST_DOUBLE:
8681       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8682           && (mode == SFmode || !TARGET_VFP_SINGLE))
8683         *total = COSTS_N_INSNS (1);
8684       else
8685         *total = COSTS_N_INSNS (4);
8686       return true;
8687
8688     case SET:
8689       /* The vec_extract patterns accept memory operands that require an
8690          address reload.  Account for the cost of that reload to give the
8691          auto-inc-dec pass an incentive to try to replace them.  */
8692       if (TARGET_NEON && MEM_P (SET_DEST (x))
8693           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8694         {
8695           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8696           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8697             *total += COSTS_N_INSNS (1);
8698           return true;
8699         }
8700       /* Likewise for the vec_set patterns.  */
8701       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8702           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8703           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8704         {
8705           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8706           *total = rtx_cost (mem, code, 0, speed);
8707           if (!neon_vector_mem_operand (mem, 2, true))
8708             *total += COSTS_N_INSNS (1);
8709           return true;
8710         }
8711       return false;
8712
8713     case UNSPEC:
8714       /* We cost this as high as our memory costs to allow this to
8715          be hoisted from loops.  */
8716       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8717         {
8718           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8719         }
8720       return true;
8721
8722     case CONST_VECTOR:
8723       if (TARGET_NEON
8724           && TARGET_HARD_FLOAT
8725           && outer == SET
8726           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8727           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8728         *total = COSTS_N_INSNS (1);
8729       else
8730         *total = COSTS_N_INSNS (4);
8731       return true;
8732
8733     default:
8734       *total = COSTS_N_INSNS (4);
8735       return false;
8736     }
8737 }
8738
8739 /* Estimates the size cost of thumb1 instructions.
8740    For now most of the code is copied from thumb1_rtx_costs. We need more
8741    fine grain tuning when we have more related test cases.  */
8742 static inline int
8743 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8744 {
8745   enum machine_mode mode = GET_MODE (x);
8746   int words;
8747
8748   switch (code)
8749     {
8750     case ASHIFT:
8751     case ASHIFTRT:
8752     case LSHIFTRT:
8753     case ROTATERT:
8754       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8755
8756     case PLUS:
8757     case MINUS:
8758       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8759          defined by RTL expansion, especially for the expansion of
8760          multiplication.  */
8761       if ((GET_CODE (XEXP (x, 0)) == MULT
8762            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8763           || (GET_CODE (XEXP (x, 1)) == MULT
8764               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8765         return COSTS_N_INSNS (2);
8766       /* On purpose fall through for normal RTX.  */
8767     case COMPARE:
8768     case NEG:
8769     case NOT:
8770       return COSTS_N_INSNS (1);
8771
8772     case MULT:
8773       if (CONST_INT_P (XEXP (x, 1)))
8774         {
8775           /* Thumb1 mul instruction can't operate on const. We must Load it
8776              into a register first.  */
8777           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8778           return COSTS_N_INSNS (1) + const_size;
8779         }
8780       return COSTS_N_INSNS (1);
8781
8782     case SET:
8783       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8784          the mode.  */
8785       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8786       return (COSTS_N_INSNS (words)
8787               + 4 * ((MEM_P (SET_SRC (x)))
8788                      + MEM_P (SET_DEST (x))));
8789
8790     case CONST_INT:
8791       if (outer == SET)
8792         {
8793           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8794             return COSTS_N_INSNS (1);
8795           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8796           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8797             return COSTS_N_INSNS (2);
8798           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8799           if (thumb_shiftable_const (INTVAL (x)))
8800             return COSTS_N_INSNS (2);
8801           return COSTS_N_INSNS (3);
8802         }
8803       else if ((outer == PLUS || outer == COMPARE)
8804                && INTVAL (x) < 256 && INTVAL (x) > -256)
8805         return 0;
8806       else if ((outer == IOR || outer == XOR || outer == AND)
8807                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8808         return COSTS_N_INSNS (1);
8809       else if (outer == AND)
8810         {
8811           int i;
8812           /* This duplicates the tests in the andsi3 expander.  */
8813           for (i = 9; i <= 31; i++)
8814             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8815                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8816               return COSTS_N_INSNS (2);
8817         }
8818       else if (outer == ASHIFT || outer == ASHIFTRT
8819                || outer == LSHIFTRT)
8820         return 0;
8821       return COSTS_N_INSNS (2);
8822
8823     case CONST:
8824     case CONST_DOUBLE:
8825     case LABEL_REF:
8826     case SYMBOL_REF:
8827       return COSTS_N_INSNS (3);
8828
8829     case UDIV:
8830     case UMOD:
8831     case DIV:
8832     case MOD:
8833       return 100;
8834
8835     case TRUNCATE:
8836       return 99;
8837
8838     case AND:
8839     case XOR:
8840     case IOR:
8841       /* XXX guess.  */
8842       return 8;
8843
8844     case MEM:
8845       /* XXX another guess.  */
8846       /* Memory costs quite a lot for the first word, but subsequent words
8847          load at the equivalent of a single insn each.  */
8848       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8849               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8850                  ? 4 : 0));
8851
8852     case IF_THEN_ELSE:
8853       /* XXX a guess.  */
8854       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8855         return 14;
8856       return 2;
8857
8858     case ZERO_EXTEND:
8859       /* XXX still guessing.  */
8860       switch (GET_MODE (XEXP (x, 0)))
8861         {
8862           case QImode:
8863             return (1 + (mode == DImode ? 4 : 0)
8864                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8865
8866           case HImode:
8867             return (4 + (mode == DImode ? 4 : 0)
8868                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8869
8870           case SImode:
8871             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8872
8873           default:
8874             return 99;
8875         }
8876
8877     default:
8878       return 99;
8879     }
8880 }
8881
8882 /* RTX costs when optimizing for size.  */
8883 static bool
8884 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8885                     int *total)
8886 {
8887   enum machine_mode mode = GET_MODE (x);
8888   if (TARGET_THUMB1)
8889     {
8890       *total = thumb1_size_rtx_costs (x, code, outer_code);
8891       return true;
8892     }
8893
8894   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
8895   switch (code)
8896     {
8897     case MEM:
8898       /* A memory access costs 1 insn if the mode is small, or the address is
8899          a single register, otherwise it costs one insn per word.  */
8900       if (REG_P (XEXP (x, 0)))
8901         *total = COSTS_N_INSNS (1);
8902       else if (flag_pic
8903                && GET_CODE (XEXP (x, 0)) == PLUS
8904                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8905         /* This will be split into two instructions.
8906            See arm.md:calculate_pic_address.  */
8907         *total = COSTS_N_INSNS (2);
8908       else
8909         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8910       return true;
8911
8912     case DIV:
8913     case MOD:
8914     case UDIV:
8915     case UMOD:
8916       /* Needs a libcall, so it costs about this.  */
8917       *total = COSTS_N_INSNS (2);
8918       return false;
8919
8920     case ROTATE:
8921       if (mode == SImode && REG_P (XEXP (x, 1)))
8922         {
8923           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8924           return true;
8925         }
8926       /* Fall through */
8927     case ROTATERT:
8928     case ASHIFT:
8929     case LSHIFTRT:
8930     case ASHIFTRT:
8931       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8932         {
8933           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8934           return true;
8935         }
8936       else if (mode == SImode)
8937         {
8938           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8939           /* Slightly disparage register shifts, but not by much.  */
8940           if (!CONST_INT_P (XEXP (x, 1)))
8941             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8942           return true;
8943         }
8944
8945       /* Needs a libcall.  */
8946       *total = COSTS_N_INSNS (2);
8947       return false;
8948
8949     case MINUS:
8950       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8951           && (mode == SFmode || !TARGET_VFP_SINGLE))
8952         {
8953           *total = COSTS_N_INSNS (1);
8954           return false;
8955         }
8956
8957       if (mode == SImode)
8958         {
8959           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8960           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8961
8962           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8963               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8964               || subcode1 == ROTATE || subcode1 == ROTATERT
8965               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8966               || subcode1 == ASHIFTRT)
8967             {
8968               /* It's just the cost of the two operands.  */
8969               *total = 0;
8970               return false;
8971             }
8972
8973           *total = COSTS_N_INSNS (1);
8974           return false;
8975         }
8976
8977       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8978       return false;
8979
8980     case PLUS:
8981       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8982           && (mode == SFmode || !TARGET_VFP_SINGLE))
8983         {
8984           *total = COSTS_N_INSNS (1);
8985           return false;
8986         }
8987
8988       /* A shift as a part of ADD costs nothing.  */
8989       if (GET_CODE (XEXP (x, 0)) == MULT
8990           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8991         {
8992           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8993           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8994           *total += rtx_cost (XEXP (x, 1), code, 1, false);
8995           return true;
8996         }
8997
8998       /* Fall through */
8999     case AND: case XOR: case IOR:
9000       if (mode == SImode)
9001         {
9002           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9003
9004           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9005               || subcode == LSHIFTRT || subcode == ASHIFTRT
9006               || (code == AND && subcode == NOT))
9007             {
9008               /* It's just the cost of the two operands.  */
9009               *total = 0;
9010               return false;
9011             }
9012         }
9013
9014       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9015       return false;
9016
9017     case MULT:
9018       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9019       return false;
9020
9021     case NEG:
9022       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9023           && (mode == SFmode || !TARGET_VFP_SINGLE))
9024         {
9025           *total = COSTS_N_INSNS (1);
9026           return false;
9027         }
9028
9029       /* Fall through */
9030     case NOT:
9031       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9032
9033       return false;
9034
9035     case IF_THEN_ELSE:
9036       *total = 0;
9037       return false;
9038
9039     case COMPARE:
9040       if (cc_register (XEXP (x, 0), VOIDmode))
9041         * total = 0;
9042       else
9043         *total = COSTS_N_INSNS (1);
9044       return false;
9045
9046     case ABS:
9047       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9048           && (mode == SFmode || !TARGET_VFP_SINGLE))
9049         *total = COSTS_N_INSNS (1);
9050       else
9051         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9052       return false;
9053
9054     case SIGN_EXTEND:
9055     case ZERO_EXTEND:
9056       return arm_rtx_costs_1 (x, outer_code, total, 0);
9057
9058     case CONST_INT:
9059       if (const_ok_for_arm (INTVAL (x)))
9060         /* A multiplication by a constant requires another instruction
9061            to load the constant to a register.  */
9062         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9063                                 ? 1 : 0);
9064       else if (const_ok_for_arm (~INTVAL (x)))
9065         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9066       else if (const_ok_for_arm (-INTVAL (x)))
9067         {
9068           if (outer_code == COMPARE || outer_code == PLUS
9069               || outer_code == MINUS)
9070             *total = 0;
9071           else
9072             *total = COSTS_N_INSNS (1);
9073         }
9074       else
9075         *total = COSTS_N_INSNS (2);
9076       return true;
9077
9078     case CONST:
9079     case LABEL_REF:
9080     case SYMBOL_REF:
9081       *total = COSTS_N_INSNS (2);
9082       return true;
9083
9084     case CONST_DOUBLE:
9085       *total = COSTS_N_INSNS (4);
9086       return true;
9087
9088     case CONST_VECTOR:
9089       if (TARGET_NEON
9090           && TARGET_HARD_FLOAT
9091           && outer_code == SET
9092           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9093           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9094         *total = COSTS_N_INSNS (1);
9095       else
9096         *total = COSTS_N_INSNS (4);
9097       return true;
9098
9099     case HIGH:
9100     case LO_SUM:
9101       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9102          cost of these slightly.  */
9103       *total = COSTS_N_INSNS (1) + 1;
9104       return true;
9105
9106     case SET:
9107       return false;
9108
9109     default:
9110       if (mode != VOIDmode)
9111         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9112       else
9113         *total = COSTS_N_INSNS (4); /* How knows?  */
9114       return false;
9115     }
9116 }
9117
9118 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9119    operand, then return the operand that is being shifted.  If the shift
9120    is not by a constant, then set SHIFT_REG to point to the operand.
9121    Return NULL if OP is not a shifter operand.  */
9122 static rtx
9123 shifter_op_p (rtx op, rtx *shift_reg)
9124 {
9125   enum rtx_code code = GET_CODE (op);
9126
9127   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9128       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9129     return XEXP (op, 0);
9130   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9131     return XEXP (op, 0);
9132   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9133            || code == ASHIFTRT)
9134     {
9135       if (!CONST_INT_P (XEXP (op, 1)))
9136         *shift_reg = XEXP (op, 1);
9137       return XEXP (op, 0);
9138     }
9139
9140   return NULL;
9141 }
9142
9143 static bool
9144 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9145 {
9146   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9147   gcc_assert (GET_CODE (x) == UNSPEC);
9148
9149   switch (XINT (x, 1))
9150     {
9151     case UNSPEC_UNALIGNED_LOAD:
9152       /* We can only do unaligned loads into the integer unit, and we can't
9153          use LDM or LDRD.  */
9154       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9155       if (speed_p)
9156         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9157                   + extra_cost->ldst.load_unaligned);
9158
9159 #ifdef NOT_YET
9160       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9161                                  ADDR_SPACE_GENERIC, speed_p);
9162 #endif
9163       return true;
9164
9165     case UNSPEC_UNALIGNED_STORE:
9166       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9167       if (speed_p)
9168         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9169                   + extra_cost->ldst.store_unaligned);
9170
9171       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9172 #ifdef NOT_YET
9173       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9174                                  ADDR_SPACE_GENERIC, speed_p);
9175 #endif
9176       return true;
9177
9178     case UNSPEC_VRINTZ:
9179     case UNSPEC_VRINTP:
9180     case UNSPEC_VRINTM:
9181     case UNSPEC_VRINTR:
9182     case UNSPEC_VRINTX:
9183     case UNSPEC_VRINTA:
9184       *cost = COSTS_N_INSNS (1);
9185       if (speed_p)
9186         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9187
9188       return true;
9189     default:
9190       *cost = COSTS_N_INSNS (2);
9191       break;
9192     }
9193   return false;
9194 }
9195
9196 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9197    call (one insn for -Os) and then one for processing the result.  */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9199
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9201         do                                                              \
9202           {                                                             \
9203             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9204             if (shift_op != NULL                                        \
9205                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9206               {                                                         \
9207                 if (shift_reg)                                          \
9208                   {                                                     \
9209                     if (speed_p)                                        \
9210                       *cost += extra_cost->alu.arith_shift_reg; \
9211                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9212                   }                                                     \
9213                 else if (speed_p)                                       \
9214                   *cost += extra_cost->alu.arith_shift;         \
9215                                                                         \
9216                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9217                           + rtx_cost (XEXP (x, 1 - IDX),                \
9218                                       OP, 1, speed_p));         \
9219                 return true;                                            \
9220               }                                                         \
9221           }                                                             \
9222         while (0);
9223
9224 /* RTX costs.  Make an estimate of the cost of executing the operation
9225    X, which is contained with an operation with code OUTER_CODE.
9226    SPEED_P indicates whether the cost desired is the performance cost,
9227    or the size cost.  The estimate is stored in COST and the return
9228    value is TRUE if the cost calculation is final, or FALSE if the
9229    caller should recurse through the operands of X to add additional
9230    costs.
9231
9232    We currently make no attempt to model the size savings of Thumb-2
9233    16-bit instructions.  At the normal points in compilation where
9234    this code is called we have no measure of whether the condition
9235    flags are live or not, and thus no realistic way to determine what
9236    the size will eventually be.  */
9237 static bool
9238 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9239                    const struct cpu_cost_table *extra_cost,
9240                    int *cost, bool speed_p)
9241 {
9242   enum machine_mode mode = GET_MODE (x);
9243
9244   if (TARGET_THUMB1)
9245     {
9246       if (speed_p)
9247         *cost = thumb1_rtx_costs (x, code, outer_code);
9248       else
9249         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9250       return true;
9251     }
9252
9253   switch (code)
9254     {
9255     case SET:
9256       *cost = 0;
9257       /* SET RTXs don't have a mode so we get it from the destination.  */
9258       mode = GET_MODE (SET_DEST (x));
9259
9260       if (REG_P (SET_SRC (x))
9261           && REG_P (SET_DEST (x)))
9262         {
9263           /* Assume that most copies can be done with a single insn,
9264              unless we don't have HW FP, in which case everything
9265              larger than word mode will require two insns.  */
9266           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9267                                    && GET_MODE_SIZE (mode) > 4)
9268                                   || mode == DImode)
9269                                  ? 2 : 1);
9270           /* Conditional register moves can be encoded
9271              in 16 bits in Thumb mode.  */
9272           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9273             *cost >>= 1;
9274
9275           return true;
9276         }
9277
9278       if (CONST_INT_P (SET_SRC (x)))
9279         {
9280           /* Handle CONST_INT here, since the value doesn't have a mode
9281              and we would otherwise be unable to work out the true cost.  */
9282           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9283           outer_code = SET;
9284           /* Slightly lower the cost of setting a core reg to a constant.
9285              This helps break up chains and allows for better scheduling.  */
9286           if (REG_P (SET_DEST (x))
9287               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9288             *cost -= 1;
9289           x = SET_SRC (x);
9290           /* Immediate moves with an immediate in the range [0, 255] can be
9291              encoded in 16 bits in Thumb mode.  */
9292           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9293               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9294             *cost >>= 1;
9295           goto const_int_cost;
9296         }
9297
9298       return false;
9299
9300     case MEM:
9301       /* A memory access costs 1 insn if the mode is small, or the address is
9302          a single register, otherwise it costs one insn per word.  */
9303       if (REG_P (XEXP (x, 0)))
9304         *cost = COSTS_N_INSNS (1);
9305       else if (flag_pic
9306                && GET_CODE (XEXP (x, 0)) == PLUS
9307                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308         /* This will be split into two instructions.
9309            See arm.md:calculate_pic_address.  */
9310         *cost = COSTS_N_INSNS (2);
9311       else
9312         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9313
9314       /* For speed optimizations, add the costs of the address and
9315          accessing memory.  */
9316       if (speed_p)
9317 #ifdef NOT_YET
9318         *cost += (extra_cost->ldst.load
9319                   + arm_address_cost (XEXP (x, 0), mode,
9320                                       ADDR_SPACE_GENERIC, speed_p));
9321 #else
9322         *cost += extra_cost->ldst.load;
9323 #endif
9324       return true;
9325
9326     case PARALLEL:
9327     {
9328    /* Calculations of LDM costs are complex.  We assume an initial cost
9329    (ldm_1st) which will load the number of registers mentioned in
9330    ldm_regs_per_insn_1st registers; then each additional
9331    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9332    formula for N regs is thus:
9333
9334    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9335                              + ldm_regs_per_insn_subsequent - 1)
9336                             / ldm_regs_per_insn_subsequent).
9337
9338    Additional costs may also be added for addressing.  A similar
9339    formula is used for STM.  */
9340
9341       bool is_ldm = load_multiple_operation (x, SImode);
9342       bool is_stm = store_multiple_operation (x, SImode);
9343
9344       *cost = COSTS_N_INSNS (1);
9345
9346       if (is_ldm || is_stm)
9347         {
9348           if (speed_p)
9349             {
9350               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9351               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9352                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9353                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9354               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9355                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9356                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9357
9358               *cost += regs_per_insn_1st
9359                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9360                                             + regs_per_insn_sub - 1)
9361                                           / regs_per_insn_sub);
9362               return true;
9363             }
9364
9365         }
9366       return false;
9367     }
9368     case DIV:
9369     case UDIV:
9370       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9371           && (mode == SFmode || !TARGET_VFP_SINGLE))
9372         *cost = COSTS_N_INSNS (speed_p
9373                                ? extra_cost->fp[mode != SFmode].div : 1);
9374       else if (mode == SImode && TARGET_IDIV)
9375         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9376       else
9377         *cost = LIBCALL_COST (2);
9378       return false;     /* All arguments must be in registers.  */
9379
9380     case MOD:
9381     case UMOD:
9382       *cost = LIBCALL_COST (2);
9383       return false;     /* All arguments must be in registers.  */
9384
9385     case ROTATE:
9386       if (mode == SImode && REG_P (XEXP (x, 1)))
9387         {
9388           *cost = (COSTS_N_INSNS (2)
9389                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9390           if (speed_p)
9391             *cost += extra_cost->alu.shift_reg;
9392           return true;
9393         }
9394       /* Fall through */
9395     case ROTATERT:
9396     case ASHIFT:
9397     case LSHIFTRT:
9398     case ASHIFTRT:
9399       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9400         {
9401           *cost = (COSTS_N_INSNS (3)
9402                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9403           if (speed_p)
9404             *cost += 2 * extra_cost->alu.shift;
9405           return true;
9406         }
9407       else if (mode == SImode)
9408         {
9409           *cost = (COSTS_N_INSNS (1)
9410                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9411           /* Slightly disparage register shifts at -Os, but not by much.  */
9412           if (!CONST_INT_P (XEXP (x, 1)))
9413             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9414                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9415           return true;
9416         }
9417       else if (GET_MODE_CLASS (mode) == MODE_INT
9418                && GET_MODE_SIZE (mode) < 4)
9419         {
9420           if (code == ASHIFT)
9421             {
9422               *cost = (COSTS_N_INSNS (1)
9423                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9424               /* Slightly disparage register shifts at -Os, but not by
9425                  much.  */
9426               if (!CONST_INT_P (XEXP (x, 1)))
9427                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9428                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9429             }
9430           else if (code == LSHIFTRT || code == ASHIFTRT)
9431             {
9432               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9433                 {
9434                   /* Can use SBFX/UBFX.  */
9435                   *cost = COSTS_N_INSNS (1);
9436                   if (speed_p)
9437                     *cost += extra_cost->alu.bfx;
9438                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9439                 }
9440               else
9441                 {
9442                   *cost = COSTS_N_INSNS (2);
9443                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9444                   if (speed_p)
9445                     {
9446                       if (CONST_INT_P (XEXP (x, 1)))
9447                         *cost += 2 * extra_cost->alu.shift;
9448                       else
9449                         *cost += (extra_cost->alu.shift
9450                                   + extra_cost->alu.shift_reg);
9451                     }
9452                   else
9453                     /* Slightly disparage register shifts.  */
9454                     *cost += !CONST_INT_P (XEXP (x, 1));
9455                 }
9456             }
9457           else /* Rotates.  */
9458             {
9459               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9460               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9461               if (speed_p)
9462                 {
9463                   if (CONST_INT_P (XEXP (x, 1)))
9464                     *cost += (2 * extra_cost->alu.shift
9465                               + extra_cost->alu.log_shift);
9466                   else
9467                     *cost += (extra_cost->alu.shift
9468                               + extra_cost->alu.shift_reg
9469                               + extra_cost->alu.log_shift_reg);
9470                 }
9471             }
9472           return true;
9473         }
9474
9475       *cost = LIBCALL_COST (2);
9476       return false;
9477
9478     case BSWAP:
9479       if (arm_arch6)
9480         {
9481           if (mode == SImode)
9482             {
9483               *cost = COSTS_N_INSNS (1);
9484               if (speed_p)
9485                 *cost += extra_cost->alu.rev;
9486
9487               return false;
9488             }
9489         }
9490       else
9491         {
9492         /* No rev instruction available.  Look at arm_legacy_rev
9493            and thumb_legacy_rev for the form of RTL used then.  */
9494           if (TARGET_THUMB)
9495             {
9496               *cost = COSTS_N_INSNS (10);
9497
9498               if (speed_p)
9499                 {
9500                   *cost += 6 * extra_cost->alu.shift;
9501                   *cost += 3 * extra_cost->alu.logical;
9502                 }
9503             }
9504           else
9505             {
9506               *cost = COSTS_N_INSNS (5);
9507
9508               if (speed_p)
9509                 {
9510                   *cost += 2 * extra_cost->alu.shift;
9511                   *cost += extra_cost->alu.arith_shift;
9512                   *cost += 2 * extra_cost->alu.logical;
9513                 }
9514             }
9515           return true;
9516         }
9517       return false;
9518
9519     case MINUS:
9520       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9521           && (mode == SFmode || !TARGET_VFP_SINGLE))
9522         {
9523           *cost = COSTS_N_INSNS (1);
9524           if (GET_CODE (XEXP (x, 0)) == MULT
9525               || GET_CODE (XEXP (x, 1)) == MULT)
9526             {
9527               rtx mul_op0, mul_op1, sub_op;
9528
9529               if (speed_p)
9530                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9531
9532               if (GET_CODE (XEXP (x, 0)) == MULT)
9533                 {
9534                   mul_op0 = XEXP (XEXP (x, 0), 0);
9535                   mul_op1 = XEXP (XEXP (x, 0), 1);
9536                   sub_op = XEXP (x, 1);
9537                 }
9538               else
9539                 {
9540                   mul_op0 = XEXP (XEXP (x, 1), 0);
9541                   mul_op1 = XEXP (XEXP (x, 1), 1);
9542                   sub_op = XEXP (x, 0);
9543                 }
9544
9545               /* The first operand of the multiply may be optionally
9546                  negated.  */
9547               if (GET_CODE (mul_op0) == NEG)
9548                 mul_op0 = XEXP (mul_op0, 0);
9549
9550               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9551                         + rtx_cost (mul_op1, code, 0, speed_p)
9552                         + rtx_cost (sub_op, code, 0, speed_p));
9553
9554               return true;
9555             }
9556
9557           if (speed_p)
9558             *cost += extra_cost->fp[mode != SFmode].addsub;
9559           return false;
9560         }
9561
9562       if (mode == SImode)
9563         {
9564           rtx shift_by_reg = NULL;
9565           rtx shift_op;
9566           rtx non_shift_op;
9567
9568           *cost = COSTS_N_INSNS (1);
9569
9570           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9571           if (shift_op == NULL)
9572             {
9573               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9574               non_shift_op = XEXP (x, 0);
9575             }
9576           else
9577             non_shift_op = XEXP (x, 1);
9578
9579           if (shift_op != NULL)
9580             {
9581               if (shift_by_reg != NULL)
9582                 {
9583                   if (speed_p)
9584                     *cost += extra_cost->alu.arith_shift_reg;
9585                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9586                 }
9587               else if (speed_p)
9588                 *cost += extra_cost->alu.arith_shift;
9589
9590               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9591                         + rtx_cost (non_shift_op, code, 0, speed_p));
9592               return true;
9593             }
9594
9595           if (arm_arch_thumb2
9596               && GET_CODE (XEXP (x, 1)) == MULT)
9597             {
9598               /* MLS.  */
9599               if (speed_p)
9600                 *cost += extra_cost->mult[0].add;
9601               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9602                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9603                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9604               return true;
9605             }
9606
9607           if (CONST_INT_P (XEXP (x, 0)))
9608             {
9609               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9610                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9611                                             NULL_RTX, 1, 0);
9612               *cost = COSTS_N_INSNS (insns);
9613               if (speed_p)
9614                 *cost += insns * extra_cost->alu.arith;
9615               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9616               return true;
9617             }
9618
9619           return false;
9620         }
9621
9622       if (GET_MODE_CLASS (mode) == MODE_INT
9623           && GET_MODE_SIZE (mode) < 4)
9624         {
9625           rtx shift_op, shift_reg;
9626           shift_reg = NULL;
9627
9628           /* We check both sides of the MINUS for shifter operands since,
9629              unlike PLUS, it's not commutative.  */
9630
9631           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9632           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9633
9634           /* Slightly disparage, as we might need to widen the result.  */
9635           *cost = 1 + COSTS_N_INSNS (1);
9636           if (speed_p)
9637             *cost += extra_cost->alu.arith;
9638
9639           if (CONST_INT_P (XEXP (x, 0)))
9640             {
9641               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9642               return true;
9643             }
9644
9645           return false;
9646         }
9647
9648       if (mode == DImode)
9649         {
9650           *cost = COSTS_N_INSNS (2);
9651
9652           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9653             {
9654               rtx op1 = XEXP (x, 1);
9655
9656               if (speed_p)
9657                 *cost += 2 * extra_cost->alu.arith;
9658
9659               if (GET_CODE (op1) == ZERO_EXTEND)
9660                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9661               else
9662                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9663               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9664                                  0, speed_p);
9665               return true;
9666             }
9667           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9668             {
9669               if (speed_p)
9670                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9671               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9672                                   0, speed_p)
9673                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9674               return true;
9675             }
9676           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9677                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9678             {
9679               if (speed_p)
9680                 *cost += (extra_cost->alu.arith
9681                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9682                              ? extra_cost->alu.arith
9683                              : extra_cost->alu.arith_shift));
9684               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9685                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9686                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9687               return true;
9688             }
9689
9690           if (speed_p)
9691             *cost += 2 * extra_cost->alu.arith;
9692           return false;
9693         }
9694
9695       /* Vector mode?  */
9696
9697       *cost = LIBCALL_COST (2);
9698       return false;
9699
9700     case PLUS:
9701       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9702           && (mode == SFmode || !TARGET_VFP_SINGLE))
9703         {
9704           *cost = COSTS_N_INSNS (1);
9705           if (GET_CODE (XEXP (x, 0)) == MULT)
9706             {
9707               rtx mul_op0, mul_op1, add_op;
9708
9709               if (speed_p)
9710                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9711
9712               mul_op0 = XEXP (XEXP (x, 0), 0);
9713               mul_op1 = XEXP (XEXP (x, 0), 1);
9714               add_op = XEXP (x, 1);
9715
9716               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9717                         + rtx_cost (mul_op1, code, 0, speed_p)
9718                         + rtx_cost (add_op, code, 0, speed_p));
9719
9720               return true;
9721             }
9722
9723           if (speed_p)
9724             *cost += extra_cost->fp[mode != SFmode].addsub;
9725           return false;
9726         }
9727       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9728         {
9729           *cost = LIBCALL_COST (2);
9730           return false;
9731         }
9732
9733         /* Narrow modes can be synthesized in SImode, but the range
9734            of useful sub-operations is limited.  Check for shift operations
9735            on one of the operands.  Only left shifts can be used in the
9736            narrow modes.  */
9737       if (GET_MODE_CLASS (mode) == MODE_INT
9738           && GET_MODE_SIZE (mode) < 4)
9739         {
9740           rtx shift_op, shift_reg;
9741           shift_reg = NULL;
9742
9743           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9744
9745           if (CONST_INT_P (XEXP (x, 1)))
9746             {
9747               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9748                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9749                                             NULL_RTX, 1, 0);
9750               *cost = COSTS_N_INSNS (insns);
9751               if (speed_p)
9752                 *cost += insns * extra_cost->alu.arith;
9753               /* Slightly penalize a narrow operation as the result may
9754                  need widening.  */
9755               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9756               return true;
9757             }
9758
9759           /* Slightly penalize a narrow operation as the result may
9760              need widening.  */
9761           *cost = 1 + COSTS_N_INSNS (1);
9762           if (speed_p)
9763             *cost += extra_cost->alu.arith;
9764
9765           return false;
9766         }
9767
9768       if (mode == SImode)
9769         {
9770           rtx shift_op, shift_reg;
9771
9772           *cost = COSTS_N_INSNS (1);
9773           if (TARGET_INT_SIMD
9774               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9775                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9776             {
9777               /* UXTA[BH] or SXTA[BH].  */
9778               if (speed_p)
9779                 *cost += extra_cost->alu.extend_arith;
9780               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9781                                   speed_p)
9782                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9783               return true;
9784             }
9785
9786           shift_reg = NULL;
9787           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9788           if (shift_op != NULL)
9789             {
9790               if (shift_reg)
9791                 {
9792                   if (speed_p)
9793                     *cost += extra_cost->alu.arith_shift_reg;
9794                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9795                 }
9796               else if (speed_p)
9797                 *cost += extra_cost->alu.arith_shift;
9798
9799               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9800                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9801               return true;
9802             }
9803           if (GET_CODE (XEXP (x, 0)) == MULT)
9804             {
9805               rtx mul_op = XEXP (x, 0);
9806
9807               *cost = COSTS_N_INSNS (1);
9808
9809               if (TARGET_DSP_MULTIPLY
9810                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9811                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9812                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9813                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9814                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9815                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9816                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9817                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9818                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9819                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9820                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9821                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9822                                       == 16))))))
9823                 {
9824                   /* SMLA[BT][BT].  */
9825                   if (speed_p)
9826                     *cost += extra_cost->mult[0].extend_add;
9827                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9828                                       SIGN_EXTEND, 0, speed_p)
9829                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9830                                         SIGN_EXTEND, 0, speed_p)
9831                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9832                   return true;
9833                 }
9834
9835               if (speed_p)
9836                 *cost += extra_cost->mult[0].add;
9837               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9838                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9839                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9840               return true;
9841             }
9842           if (CONST_INT_P (XEXP (x, 1)))
9843             {
9844               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9845                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9846                                             NULL_RTX, 1, 0);
9847               *cost = COSTS_N_INSNS (insns);
9848               if (speed_p)
9849                 *cost += insns * extra_cost->alu.arith;
9850               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9851               return true;
9852             }
9853           return false;
9854         }
9855
9856       if (mode == DImode)
9857         {
9858           if (arm_arch3m
9859               && GET_CODE (XEXP (x, 0)) == MULT
9860               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9861                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9862                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9863                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9864             {
9865               *cost = COSTS_N_INSNS (1);
9866               if (speed_p)
9867                 *cost += extra_cost->mult[1].extend_add;
9868               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9869                                   ZERO_EXTEND, 0, speed_p)
9870                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9871                                     ZERO_EXTEND, 0, speed_p)
9872                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9873               return true;
9874             }
9875
9876           *cost = COSTS_N_INSNS (2);
9877
9878           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9879               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9880             {
9881               if (speed_p)
9882                 *cost += (extra_cost->alu.arith
9883                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9884                              ? extra_cost->alu.arith
9885                              : extra_cost->alu.arith_shift));
9886
9887               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9888                                   speed_p)
9889                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9890               return true;
9891             }
9892
9893           if (speed_p)
9894             *cost += 2 * extra_cost->alu.arith;
9895           return false;
9896         }
9897
9898       /* Vector mode?  */
9899       *cost = LIBCALL_COST (2);
9900       return false;
9901     case IOR:
9902       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9903         {
9904           *cost = COSTS_N_INSNS (1);
9905           if (speed_p)
9906             *cost += extra_cost->alu.rev;
9907
9908           return true;
9909         }
9910     /* Fall through.  */
9911     case AND: case XOR:
9912       if (mode == SImode)
9913         {
9914           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9915           rtx op0 = XEXP (x, 0);
9916           rtx shift_op, shift_reg;
9917
9918           *cost = COSTS_N_INSNS (1);
9919
9920           if (subcode == NOT
9921               && (code == AND
9922                   || (code == IOR && TARGET_THUMB2)))
9923             op0 = XEXP (op0, 0);
9924
9925           shift_reg = NULL;
9926           shift_op = shifter_op_p (op0, &shift_reg);
9927           if (shift_op != NULL)
9928             {
9929               if (shift_reg)
9930                 {
9931                   if (speed_p)
9932                     *cost += extra_cost->alu.log_shift_reg;
9933                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9934                 }
9935               else if (speed_p)
9936                 *cost += extra_cost->alu.log_shift;
9937
9938               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9939                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9940               return true;
9941             }
9942
9943           if (CONST_INT_P (XEXP (x, 1)))
9944             {
9945               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9946                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9947                                             NULL_RTX, 1, 0);
9948
9949               *cost = COSTS_N_INSNS (insns);
9950               if (speed_p)
9951                 *cost += insns * extra_cost->alu.logical;
9952               *cost += rtx_cost (op0, code, 0, speed_p);
9953               return true;
9954             }
9955
9956           if (speed_p)
9957             *cost += extra_cost->alu.logical;
9958           *cost += (rtx_cost (op0, code, 0, speed_p)
9959                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9960           return true;
9961         }
9962
9963       if (mode == DImode)
9964         {
9965           rtx op0 = XEXP (x, 0);
9966           enum rtx_code subcode = GET_CODE (op0);
9967
9968           *cost = COSTS_N_INSNS (2);
9969
9970           if (subcode == NOT
9971               && (code == AND
9972                   || (code == IOR && TARGET_THUMB2)))
9973             op0 = XEXP (op0, 0);
9974
9975           if (GET_CODE (op0) == ZERO_EXTEND)
9976             {
9977               if (speed_p)
9978                 *cost += 2 * extra_cost->alu.logical;
9979
9980               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9981                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9982               return true;
9983             }
9984           else if (GET_CODE (op0) == SIGN_EXTEND)
9985             {
9986               if (speed_p)
9987                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9988
9989               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9990                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9991               return true;
9992             }
9993
9994           if (speed_p)
9995             *cost += 2 * extra_cost->alu.logical;
9996
9997           return true;
9998         }
9999       /* Vector mode?  */
10000
10001       *cost = LIBCALL_COST (2);
10002       return false;
10003
10004     case MULT:
10005       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10006           && (mode == SFmode || !TARGET_VFP_SINGLE))
10007         {
10008           rtx op0 = XEXP (x, 0);
10009
10010           *cost = COSTS_N_INSNS (1);
10011
10012           if (GET_CODE (op0) == NEG)
10013             op0 = XEXP (op0, 0);
10014
10015           if (speed_p)
10016             *cost += extra_cost->fp[mode != SFmode].mult;
10017
10018           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10019                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10020           return true;
10021         }
10022       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10023         {
10024           *cost = LIBCALL_COST (2);
10025           return false;
10026         }
10027
10028       if (mode == SImode)
10029         {
10030           *cost = COSTS_N_INSNS (1);
10031           if (TARGET_DSP_MULTIPLY
10032               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10033                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10034                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10035                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10036                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10037                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10038                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10039                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10040                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10041                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10042                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10043                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10044                                   == 16))))))
10045             {
10046               /* SMUL[TB][TB].  */
10047               if (speed_p)
10048                 *cost += extra_cost->mult[0].extend;
10049               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10050                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10051               return true;
10052             }
10053           if (speed_p)
10054             *cost += extra_cost->mult[0].simple;
10055           return false;
10056         }
10057
10058       if (mode == DImode)
10059         {
10060           if (arm_arch3m
10061               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10062                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10063                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10064                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10065             {
10066               *cost = COSTS_N_INSNS (1);
10067               if (speed_p)
10068                 *cost += extra_cost->mult[1].extend;
10069               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10070                                   ZERO_EXTEND, 0, speed_p)
10071                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10072                                     ZERO_EXTEND, 0, speed_p));
10073               return true;
10074             }
10075
10076           *cost = LIBCALL_COST (2);
10077           return false;
10078         }
10079
10080       /* Vector mode?  */
10081       *cost = LIBCALL_COST (2);
10082       return false;
10083
10084     case NEG:
10085       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10086           && (mode == SFmode || !TARGET_VFP_SINGLE))
10087         {
10088           *cost = COSTS_N_INSNS (1);
10089           if (speed_p)
10090             *cost += extra_cost->fp[mode != SFmode].neg;
10091
10092           return false;
10093         }
10094       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10095         {
10096           *cost = LIBCALL_COST (1);
10097           return false;
10098         }
10099
10100       if (mode == SImode)
10101         {
10102           if (GET_CODE (XEXP (x, 0)) == ABS)
10103             {
10104               *cost = COSTS_N_INSNS (2);
10105               /* Assume the non-flag-changing variant.  */
10106               if (speed_p)
10107                 *cost += (extra_cost->alu.log_shift
10108                           + extra_cost->alu.arith_shift);
10109               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10110               return true;
10111             }
10112
10113           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10114               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10115             {
10116               *cost = COSTS_N_INSNS (2);
10117               /* No extra cost for MOV imm and MVN imm.  */
10118               /* If the comparison op is using the flags, there's no further
10119                  cost, otherwise we need to add the cost of the comparison.  */
10120               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10121                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10122                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10123                 {
10124                   *cost += (COSTS_N_INSNS (1)
10125                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10126                                         speed_p)
10127                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10128                                         speed_p));
10129                   if (speed_p)
10130                     *cost += extra_cost->alu.arith;
10131                 }
10132               return true;
10133             }
10134           *cost = COSTS_N_INSNS (1);
10135           if (speed_p)
10136             *cost += extra_cost->alu.arith;
10137           return false;
10138         }
10139
10140       if (GET_MODE_CLASS (mode) == MODE_INT
10141           && GET_MODE_SIZE (mode) < 4)
10142         {
10143           /* Slightly disparage, as we might need an extend operation.  */
10144           *cost = 1 + COSTS_N_INSNS (1);
10145           if (speed_p)
10146             *cost += extra_cost->alu.arith;
10147           return false;
10148         }
10149
10150       if (mode == DImode)
10151         {
10152           *cost = COSTS_N_INSNS (2);
10153           if (speed_p)
10154             *cost += 2 * extra_cost->alu.arith;
10155           return false;
10156         }
10157
10158       /* Vector mode?  */
10159       *cost = LIBCALL_COST (1);
10160       return false;
10161
10162     case NOT:
10163       if (mode == SImode)
10164         {
10165           rtx shift_op;
10166           rtx shift_reg = NULL;
10167
10168           *cost = COSTS_N_INSNS (1);
10169           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10170
10171           if (shift_op)
10172             {
10173               if (shift_reg != NULL)
10174                 {
10175                   if (speed_p)
10176                     *cost += extra_cost->alu.log_shift_reg;
10177                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10178                 }
10179               else if (speed_p)
10180                 *cost += extra_cost->alu.log_shift;
10181               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10182               return true;
10183             }
10184
10185           if (speed_p)
10186             *cost += extra_cost->alu.logical;
10187           return false;
10188         }
10189       if (mode == DImode)
10190         {
10191           *cost = COSTS_N_INSNS (2);
10192           return false;
10193         }
10194
10195       /* Vector mode?  */
10196
10197       *cost += LIBCALL_COST (1);
10198       return false;
10199
10200     case IF_THEN_ELSE:
10201       {
10202         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10203           {
10204             *cost = COSTS_N_INSNS (4);
10205             return true;
10206           }
10207         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10208         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10209
10210         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10211         /* Assume that if one arm of the if_then_else is a register,
10212            that it will be tied with the result and eliminate the
10213            conditional insn.  */
10214         if (REG_P (XEXP (x, 1)))
10215           *cost += op2cost;
10216         else if (REG_P (XEXP (x, 2)))
10217           *cost += op1cost;
10218         else
10219           {
10220             if (speed_p)
10221               {
10222                 if (extra_cost->alu.non_exec_costs_exec)
10223                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10224                 else
10225                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10226               }
10227             else
10228               *cost += op1cost + op2cost;
10229           }
10230       }
10231       return true;
10232
10233     case COMPARE:
10234       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10235         *cost = 0;
10236       else
10237         {
10238           enum machine_mode op0mode;
10239           /* We'll mostly assume that the cost of a compare is the cost of the
10240              LHS.  However, there are some notable exceptions.  */
10241
10242           /* Floating point compares are never done as side-effects.  */
10243           op0mode = GET_MODE (XEXP (x, 0));
10244           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10245               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10246             {
10247               *cost = COSTS_N_INSNS (1);
10248               if (speed_p)
10249                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10250
10251               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10252                 {
10253                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10254                   return true;
10255                 }
10256
10257               return false;
10258             }
10259           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10260             {
10261               *cost = LIBCALL_COST (2);
10262               return false;
10263             }
10264
10265           /* DImode compares normally take two insns.  */
10266           if (op0mode == DImode)
10267             {
10268               *cost = COSTS_N_INSNS (2);
10269               if (speed_p)
10270                 *cost += 2 * extra_cost->alu.arith;
10271               return false;
10272             }
10273
10274           if (op0mode == SImode)
10275             {
10276               rtx shift_op;
10277               rtx shift_reg;
10278
10279               if (XEXP (x, 1) == const0_rtx
10280                   && !(REG_P (XEXP (x, 0))
10281                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10282                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10283                 {
10284                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10285
10286                   /* Multiply operations that set the flags are often
10287                      significantly more expensive.  */
10288                   if (speed_p
10289                       && GET_CODE (XEXP (x, 0)) == MULT
10290                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10291                     *cost += extra_cost->mult[0].flag_setting;
10292
10293                   if (speed_p
10294                       && GET_CODE (XEXP (x, 0)) == PLUS
10295                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10296                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10297                                                             0), 1), mode))
10298                     *cost += extra_cost->mult[0].flag_setting;
10299                   return true;
10300                 }
10301
10302               shift_reg = NULL;
10303               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10304               if (shift_op != NULL)
10305                 {
10306                   *cost = COSTS_N_INSNS (1);
10307                   if (shift_reg != NULL)
10308                     {
10309                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10310                       if (speed_p)
10311                         *cost += extra_cost->alu.arith_shift_reg;
10312                     }
10313                   else if (speed_p)
10314                     *cost += extra_cost->alu.arith_shift;
10315                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10316                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10317                   return true;
10318                 }
10319
10320               *cost = COSTS_N_INSNS (1);
10321               if (speed_p)
10322                 *cost += extra_cost->alu.arith;
10323               if (CONST_INT_P (XEXP (x, 1))
10324                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10325                 {
10326                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10327                   return true;
10328                 }
10329               return false;
10330             }
10331
10332           /* Vector mode?  */
10333
10334           *cost = LIBCALL_COST (2);
10335           return false;
10336         }
10337       return true;
10338
10339     case EQ:
10340     case NE:
10341     case LT:
10342     case LE:
10343     case GT:
10344     case GE:
10345     case LTU:
10346     case LEU:
10347     case GEU:
10348     case GTU:
10349     case ORDERED:
10350     case UNORDERED:
10351     case UNEQ:
10352     case UNLE:
10353     case UNLT:
10354     case UNGE:
10355     case UNGT:
10356     case LTGT:
10357       if (outer_code == SET)
10358         {
10359           /* Is it a store-flag operation?  */
10360           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10361               && XEXP (x, 1) == const0_rtx)
10362             {
10363               /* Thumb also needs an IT insn.  */
10364               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10365               return true;
10366             }
10367           if (XEXP (x, 1) == const0_rtx)
10368             {
10369               switch (code)
10370                 {
10371                 case LT:
10372                   /* LSR Rd, Rn, #31.  */
10373                   *cost = COSTS_N_INSNS (1);
10374                   if (speed_p)
10375                     *cost += extra_cost->alu.shift;
10376                   break;
10377
10378                 case EQ:
10379                   /* RSBS T1, Rn, #0
10380                      ADC  Rd, Rn, T1.  */
10381
10382                 case NE:
10383                   /* SUBS T1, Rn, #1
10384                      SBC  Rd, Rn, T1.  */
10385                   *cost = COSTS_N_INSNS (2);
10386                   break;
10387
10388                 case LE:
10389                   /* RSBS T1, Rn, Rn, LSR #31
10390                      ADC  Rd, Rn, T1. */
10391                   *cost = COSTS_N_INSNS (2);
10392                   if (speed_p)
10393                     *cost += extra_cost->alu.arith_shift;
10394                   break;
10395
10396                 case GT:
10397                   /* RSB  Rd, Rn, Rn, ASR #1
10398                      LSR  Rd, Rd, #31.  */
10399                   *cost = COSTS_N_INSNS (2);
10400                   if (speed_p)
10401                     *cost += (extra_cost->alu.arith_shift
10402                               + extra_cost->alu.shift);
10403                   break;
10404
10405                 case GE:
10406                   /* ASR  Rd, Rn, #31
10407                      ADD  Rd, Rn, #1.  */
10408                   *cost = COSTS_N_INSNS (2);
10409                   if (speed_p)
10410                     *cost += extra_cost->alu.shift;
10411                   break;
10412
10413                 default:
10414                   /* Remaining cases are either meaningless or would take
10415                      three insns anyway.  */
10416                   *cost = COSTS_N_INSNS (3);
10417                   break;
10418                 }
10419               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10420               return true;
10421             }
10422           else
10423             {
10424               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10425               if (CONST_INT_P (XEXP (x, 1))
10426                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10427                 {
10428                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10429                   return true;
10430                 }
10431
10432               return false;
10433             }
10434         }
10435       /* Not directly inside a set.  If it involves the condition code
10436          register it must be the condition for a branch, cond_exec or
10437          I_T_E operation.  Since the comparison is performed elsewhere
10438          this is just the control part which has no additional
10439          cost.  */
10440       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10441                && XEXP (x, 1) == const0_rtx)
10442         {
10443           *cost = 0;
10444           return true;
10445         }
10446       return false;
10447
10448     case ABS:
10449       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10450           && (mode == SFmode || !TARGET_VFP_SINGLE))
10451         {
10452           *cost = COSTS_N_INSNS (1);
10453           if (speed_p)
10454             *cost += extra_cost->fp[mode != SFmode].neg;
10455
10456           return false;
10457         }
10458       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10459         {
10460           *cost = LIBCALL_COST (1);
10461           return false;
10462         }
10463
10464       if (mode == SImode)
10465         {
10466           *cost = COSTS_N_INSNS (1);
10467           if (speed_p)
10468             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10469           return false;
10470         }
10471       /* Vector mode?  */
10472       *cost = LIBCALL_COST (1);
10473       return false;
10474
10475     case SIGN_EXTEND:
10476       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10477           && MEM_P (XEXP (x, 0)))
10478         {
10479           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10480
10481           if (mode == DImode)
10482             *cost += COSTS_N_INSNS (1);
10483
10484           if (!speed_p)
10485             return true;
10486
10487           if (GET_MODE (XEXP (x, 0)) == SImode)
10488             *cost += extra_cost->ldst.load;
10489           else
10490             *cost += extra_cost->ldst.load_sign_extend;
10491
10492           if (mode == DImode)
10493             *cost += extra_cost->alu.shift;
10494
10495           return true;
10496         }
10497
10498       /* Widening from less than 32-bits requires an extend operation.  */
10499       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10500         {
10501           /* We have SXTB/SXTH.  */
10502           *cost = COSTS_N_INSNS (1);
10503           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10504           if (speed_p)
10505             *cost += extra_cost->alu.extend;
10506         }
10507       else if (GET_MODE (XEXP (x, 0)) != SImode)
10508         {
10509           /* Needs two shifts.  */
10510           *cost = COSTS_N_INSNS (2);
10511           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10512           if (speed_p)
10513             *cost += 2 * extra_cost->alu.shift;
10514         }
10515
10516       /* Widening beyond 32-bits requires one more insn.  */
10517       if (mode == DImode)
10518         {
10519           *cost += COSTS_N_INSNS (1);
10520           if (speed_p)
10521             *cost += extra_cost->alu.shift;
10522         }
10523
10524       return true;
10525
10526     case ZERO_EXTEND:
10527       if ((arm_arch4
10528            || GET_MODE (XEXP (x, 0)) == SImode
10529            || GET_MODE (XEXP (x, 0)) == QImode)
10530           && MEM_P (XEXP (x, 0)))
10531         {
10532           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10533
10534           if (mode == DImode)
10535             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10536
10537           return true;
10538         }
10539
10540       /* Widening from less than 32-bits requires an extend operation.  */
10541       if (GET_MODE (XEXP (x, 0)) == QImode)
10542         {
10543           /* UXTB can be a shorter instruction in Thumb2, but it might
10544              be slower than the AND Rd, Rn, #255 alternative.  When
10545              optimizing for speed it should never be slower to use
10546              AND, and we don't really model 16-bit vs 32-bit insns
10547              here.  */
10548           *cost = COSTS_N_INSNS (1);
10549           if (speed_p)
10550             *cost += extra_cost->alu.logical;
10551         }
10552       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10553         {
10554           /* We have UXTB/UXTH.  */
10555           *cost = COSTS_N_INSNS (1);
10556           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10557           if (speed_p)
10558             *cost += extra_cost->alu.extend;
10559         }
10560       else if (GET_MODE (XEXP (x, 0)) != SImode)
10561         {
10562           /* Needs two shifts.  It's marginally preferable to use
10563              shifts rather than two BIC instructions as the second
10564              shift may merge with a subsequent insn as a shifter
10565              op.  */
10566           *cost = COSTS_N_INSNS (2);
10567           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10568           if (speed_p)
10569             *cost += 2 * extra_cost->alu.shift;
10570         }
10571       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10572         *cost = COSTS_N_INSNS (1);
10573
10574       /* Widening beyond 32-bits requires one more insn.  */
10575       if (mode == DImode)
10576         {
10577           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10578         }
10579
10580       return true;
10581
10582     case CONST_INT:
10583       *cost = 0;
10584       /* CONST_INT has no mode, so we cannot tell for sure how many
10585          insns are really going to be needed.  The best we can do is
10586          look at the value passed.  If it fits in SImode, then assume
10587          that's the mode it will be used for.  Otherwise assume it
10588          will be used in DImode.  */
10589       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10590         mode = SImode;
10591       else
10592         mode = DImode;
10593
10594       /* Avoid blowing up in arm_gen_constant ().  */
10595       if (!(outer_code == PLUS
10596             || outer_code == AND
10597             || outer_code == IOR
10598             || outer_code == XOR
10599             || outer_code == MINUS))
10600         outer_code = SET;
10601
10602     const_int_cost:
10603       if (mode == SImode)
10604         {
10605           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10606                                                     INTVAL (x), NULL, NULL,
10607                                                     0, 0));
10608           /* Extra costs?  */
10609         }
10610       else
10611         {
10612           *cost += COSTS_N_INSNS (arm_gen_constant
10613                                   (outer_code, SImode, NULL,
10614                                    trunc_int_for_mode (INTVAL (x), SImode),
10615                                    NULL, NULL, 0, 0)
10616                                   + arm_gen_constant (outer_code, SImode, NULL,
10617                                                       INTVAL (x) >> 32, NULL,
10618                                                       NULL, 0, 0));
10619           /* Extra costs?  */
10620         }
10621
10622       return true;
10623
10624     case CONST:
10625     case LABEL_REF:
10626     case SYMBOL_REF:
10627       if (speed_p)
10628         {
10629           if (arm_arch_thumb2 && !flag_pic)
10630             *cost = COSTS_N_INSNS (2);
10631           else
10632             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10633         }
10634       else
10635         *cost = COSTS_N_INSNS (2);
10636
10637       if (flag_pic)
10638         {
10639           *cost += COSTS_N_INSNS (1);
10640           if (speed_p)
10641             *cost += extra_cost->alu.arith;
10642         }
10643
10644       return true;
10645
10646     case CONST_FIXED:
10647       *cost = COSTS_N_INSNS (4);
10648       /* Fixme.  */
10649       return true;
10650
10651     case CONST_DOUBLE:
10652       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10653           && (mode == SFmode || !TARGET_VFP_SINGLE))
10654         {
10655           if (vfp3_const_double_rtx (x))
10656             {
10657               *cost = COSTS_N_INSNS (1);
10658               if (speed_p)
10659                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10660               return true;
10661             }
10662
10663           if (speed_p)
10664             {
10665               *cost = COSTS_N_INSNS (1);
10666               if (mode == DFmode)
10667                 *cost += extra_cost->ldst.loadd;
10668               else
10669                 *cost += extra_cost->ldst.loadf;
10670             }
10671           else
10672             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10673
10674           return true;
10675         }
10676       *cost = COSTS_N_INSNS (4);
10677       return true;
10678
10679     case CONST_VECTOR:
10680       /* Fixme.  */
10681       if (TARGET_NEON
10682           && TARGET_HARD_FLOAT
10683           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10684           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10685         *cost = COSTS_N_INSNS (1);
10686       else
10687         *cost = COSTS_N_INSNS (4);
10688       return true;
10689
10690     case HIGH:
10691     case LO_SUM:
10692       *cost = COSTS_N_INSNS (1);
10693       /* When optimizing for size, we prefer constant pool entries to
10694          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10695       if (!speed_p)
10696         *cost += 1;
10697       return true;
10698
10699     case CLZ:
10700       *cost = COSTS_N_INSNS (1);
10701       if (speed_p)
10702         *cost += extra_cost->alu.clz;
10703       return false;
10704
10705     case SMIN:
10706       if (XEXP (x, 1) == const0_rtx)
10707         {
10708           *cost = COSTS_N_INSNS (1);
10709           if (speed_p)
10710             *cost += extra_cost->alu.log_shift;
10711           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10712           return true;
10713         }
10714       /* Fall through.  */
10715     case SMAX:
10716     case UMIN:
10717     case UMAX:
10718       *cost = COSTS_N_INSNS (2);
10719       return false;
10720
10721     case TRUNCATE:
10722       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10723           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10724           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10725           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10726           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10727                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10728               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10729                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10730                       == ZERO_EXTEND))))
10731         {
10732           *cost = COSTS_N_INSNS (1);
10733           if (speed_p)
10734             *cost += extra_cost->mult[1].extend;
10735           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10736                               speed_p)
10737                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10738                                 0, speed_p));
10739           return true;
10740         }
10741       *cost = LIBCALL_COST (1);
10742       return false;
10743
10744     case UNSPEC:
10745       return arm_unspec_cost (x, outer_code, speed_p, cost);
10746
10747     case PC:
10748       /* Reading the PC is like reading any other register.  Writing it
10749          is more expensive, but we take that into account elsewhere.  */
10750       *cost = 0;
10751       return true;
10752
10753     case ZERO_EXTRACT:
10754       /* TODO: Simple zero_extract of bottom bits using AND.  */
10755       /* Fall through.  */
10756     case SIGN_EXTRACT:
10757       if (arm_arch6
10758           && mode == SImode
10759           && CONST_INT_P (XEXP (x, 1))
10760           && CONST_INT_P (XEXP (x, 2)))
10761         {
10762           *cost = COSTS_N_INSNS (1);
10763           if (speed_p)
10764             *cost += extra_cost->alu.bfx;
10765           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10766           return true;
10767         }
10768       /* Without UBFX/SBFX, need to resort to shift operations.  */
10769       *cost = COSTS_N_INSNS (2);
10770       if (speed_p)
10771         *cost += 2 * extra_cost->alu.shift;
10772       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10773       return true;
10774
10775     case FLOAT_EXTEND:
10776       if (TARGET_HARD_FLOAT)
10777         {
10778           *cost = COSTS_N_INSNS (1);
10779           if (speed_p)
10780             *cost += extra_cost->fp[mode == DFmode].widen;
10781           if (!TARGET_FPU_ARMV8
10782               && GET_MODE (XEXP (x, 0)) == HFmode)
10783             {
10784               /* Pre v8, widening HF->DF is a two-step process, first
10785                  widening to SFmode.  */
10786               *cost += COSTS_N_INSNS (1);
10787               if (speed_p)
10788                 *cost += extra_cost->fp[0].widen;
10789             }
10790           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10791           return true;
10792         }
10793
10794       *cost = LIBCALL_COST (1);
10795       return false;
10796
10797     case FLOAT_TRUNCATE:
10798       if (TARGET_HARD_FLOAT)
10799         {
10800           *cost = COSTS_N_INSNS (1);
10801           if (speed_p)
10802             *cost += extra_cost->fp[mode == DFmode].narrow;
10803           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10804           return true;
10805           /* Vector modes?  */
10806         }
10807       *cost = LIBCALL_COST (1);
10808       return false;
10809
10810     case FMA:
10811       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10812         {
10813           rtx op0 = XEXP (x, 0);
10814           rtx op1 = XEXP (x, 1);
10815           rtx op2 = XEXP (x, 2);
10816
10817           *cost = COSTS_N_INSNS (1);
10818
10819           /* vfms or vfnma.  */
10820           if (GET_CODE (op0) == NEG)
10821             op0 = XEXP (op0, 0);
10822
10823           /* vfnms or vfnma.  */
10824           if (GET_CODE (op2) == NEG)
10825             op2 = XEXP (op2, 0);
10826
10827           *cost += rtx_cost (op0, FMA, 0, speed_p);
10828           *cost += rtx_cost (op1, FMA, 1, speed_p);
10829           *cost += rtx_cost (op2, FMA, 2, speed_p);
10830
10831           if (speed_p)
10832             *cost += extra_cost->fp[mode ==DFmode].fma;
10833
10834           return true;
10835         }
10836
10837       *cost = LIBCALL_COST (3);
10838       return false;
10839
10840     case FIX:
10841     case UNSIGNED_FIX:
10842       if (TARGET_HARD_FLOAT)
10843         {
10844           if (GET_MODE_CLASS (mode) == MODE_INT)
10845             {
10846               *cost = COSTS_N_INSNS (1);
10847               if (speed_p)
10848                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10849               /* Strip of the 'cost' of rounding towards zero.  */
10850               if (GET_CODE (XEXP (x, 0)) == FIX)
10851                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10852               else
10853                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10854               /* ??? Increase the cost to deal with transferring from
10855                  FP -> CORE registers?  */
10856               return true;
10857             }
10858           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10859                    && TARGET_FPU_ARMV8)
10860             {
10861               *cost = COSTS_N_INSNS (1);
10862               if (speed_p)
10863                 *cost += extra_cost->fp[mode == DFmode].roundint;
10864               return false;
10865             }
10866           /* Vector costs? */
10867         }
10868       *cost = LIBCALL_COST (1);
10869       return false;
10870
10871     case FLOAT:
10872     case UNSIGNED_FLOAT:
10873       if (TARGET_HARD_FLOAT)
10874         {
10875           /* ??? Increase the cost to deal with transferring from CORE
10876              -> FP registers?  */
10877           *cost = COSTS_N_INSNS (1);
10878           if (speed_p)
10879             *cost += extra_cost->fp[mode == DFmode].fromint;
10880           return false;
10881         }
10882       *cost = LIBCALL_COST (1);
10883       return false;
10884
10885     case CALL:
10886       *cost = COSTS_N_INSNS (1);
10887       return true;
10888
10889     case ASM_OPERANDS:
10890       {
10891       /* Just a guess.  Guess number of instructions in the asm
10892          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10893          though (see PR60663).  */
10894         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10895         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10896
10897         *cost = COSTS_N_INSNS (asm_length + num_operands);
10898         return true;
10899       }
10900     default:
10901       if (mode != VOIDmode)
10902         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10903       else
10904         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10905       return false;
10906     }
10907 }
10908
10909 #undef HANDLE_NARROW_SHIFT_ARITH
10910
10911 /* RTX costs when optimizing for size.  */
10912 static bool
10913 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10914                int *total, bool speed)
10915 {
10916   bool result;
10917
10918   if (TARGET_OLD_RTX_COSTS
10919       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10920     {
10921       /* Old way.  (Deprecated.)  */
10922       if (!speed)
10923         result = arm_size_rtx_costs (x, (enum rtx_code) code,
10924                                      (enum rtx_code) outer_code, total);
10925       else
10926         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
10927                                           (enum rtx_code) outer_code, total,
10928                                           speed);
10929     }
10930   else
10931     {
10932     /* New way.  */
10933       if (current_tune->insn_extra_cost)
10934         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
10935                                      (enum rtx_code) outer_code,
10936                                      current_tune->insn_extra_cost,
10937                                      total, speed);
10938     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10939        && current_tune->insn_extra_cost != NULL  */
10940       else
10941         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
10942                                     (enum rtx_code) outer_code,
10943                                     &generic_extra_costs, total, speed);
10944     }
10945
10946   if (dump_file && (dump_flags & TDF_DETAILS))
10947     {
10948       print_rtl_single (dump_file, x);
10949       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10950                *total, result ? "final" : "partial");
10951     }
10952   return result;
10953 }
10954
10955 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
10956    supported on any "slowmul" cores, so it can be ignored.  */
10957
10958 static bool
10959 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10960                        int *total, bool speed)
10961 {
10962   enum machine_mode mode = GET_MODE (x);
10963
10964   if (TARGET_THUMB)
10965     {
10966       *total = thumb1_rtx_costs (x, code, outer_code);
10967       return true;
10968     }
10969
10970   switch (code)
10971     {
10972     case MULT:
10973       if (GET_MODE_CLASS (mode) == MODE_FLOAT
10974           || mode == DImode)
10975         {
10976           *total = COSTS_N_INSNS (20);
10977           return false;
10978         }
10979
10980       if (CONST_INT_P (XEXP (x, 1)))
10981         {
10982           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10983                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
10984           int cost, const_ok = const_ok_for_arm (i);
10985           int j, booth_unit_size;
10986
10987           /* Tune as appropriate.  */
10988           cost = const_ok ? 4 : 8;
10989           booth_unit_size = 2;
10990           for (j = 0; i && j < 32; j += booth_unit_size)
10991             {
10992               i >>= booth_unit_size;
10993               cost++;
10994             }
10995
10996           *total = COSTS_N_INSNS (cost);
10997           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10998           return true;
10999         }
11000
11001       *total = COSTS_N_INSNS (20);
11002       return false;
11003
11004     default:
11005       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11006     }
11007 }
11008
11009
11010 /* RTX cost for cores with a fast multiply unit (M variants).  */
11011
11012 static bool
11013 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11014                        int *total, bool speed)
11015 {
11016   enum machine_mode mode = GET_MODE (x);
11017
11018   if (TARGET_THUMB1)
11019     {
11020       *total = thumb1_rtx_costs (x, code, outer_code);
11021       return true;
11022     }
11023
11024   /* ??? should thumb2 use different costs?  */
11025   switch (code)
11026     {
11027     case MULT:
11028       /* There is no point basing this on the tuning, since it is always the
11029          fast variant if it exists at all.  */
11030       if (mode == DImode
11031           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11032           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11033               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11034         {
11035           *total = COSTS_N_INSNS(2);
11036           return false;
11037         }
11038
11039
11040       if (mode == DImode)
11041         {
11042           *total = COSTS_N_INSNS (5);
11043           return false;
11044         }
11045
11046       if (CONST_INT_P (XEXP (x, 1)))
11047         {
11048           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11049                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11050           int cost, const_ok = const_ok_for_arm (i);
11051           int j, booth_unit_size;
11052
11053           /* Tune as appropriate.  */
11054           cost = const_ok ? 4 : 8;
11055           booth_unit_size = 8;
11056           for (j = 0; i && j < 32; j += booth_unit_size)
11057             {
11058               i >>= booth_unit_size;
11059               cost++;
11060             }
11061
11062           *total = COSTS_N_INSNS(cost);
11063           return false;
11064         }
11065
11066       if (mode == SImode)
11067         {
11068           *total = COSTS_N_INSNS (4);
11069           return false;
11070         }
11071
11072       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11073         {
11074           if (TARGET_HARD_FLOAT
11075               && (mode == SFmode
11076                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11077             {
11078               *total = COSTS_N_INSNS (1);
11079               return false;
11080             }
11081         }
11082
11083       /* Requires a lib call */
11084       *total = COSTS_N_INSNS (20);
11085       return false;
11086
11087     default:
11088       return arm_rtx_costs_1 (x, outer_code, total, speed);
11089     }
11090 }
11091
11092
11093 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11094    so it can be ignored.  */
11095
11096 static bool
11097 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11098                       int *total, bool speed)
11099 {
11100   enum machine_mode mode = GET_MODE (x);
11101
11102   if (TARGET_THUMB)
11103     {
11104       *total = thumb1_rtx_costs (x, code, outer_code);
11105       return true;
11106     }
11107
11108   switch (code)
11109     {
11110     case COMPARE:
11111       if (GET_CODE (XEXP (x, 0)) != MULT)
11112         return arm_rtx_costs_1 (x, outer_code, total, speed);
11113
11114       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11115          will stall until the multiplication is complete.  */
11116       *total = COSTS_N_INSNS (3);
11117       return false;
11118
11119     case MULT:
11120       /* There is no point basing this on the tuning, since it is always the
11121          fast variant if it exists at all.  */
11122       if (mode == DImode
11123           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11124           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11125               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11126         {
11127           *total = COSTS_N_INSNS (2);
11128           return false;
11129         }
11130
11131
11132       if (mode == DImode)
11133         {
11134           *total = COSTS_N_INSNS (5);
11135           return false;
11136         }
11137
11138       if (CONST_INT_P (XEXP (x, 1)))
11139         {
11140           /* If operand 1 is a constant we can more accurately
11141              calculate the cost of the multiply.  The multiplier can
11142              retire 15 bits on the first cycle and a further 12 on the
11143              second.  We do, of course, have to load the constant into
11144              a register first.  */
11145           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11146           /* There's a general overhead of one cycle.  */
11147           int cost = 1;
11148           unsigned HOST_WIDE_INT masked_const;
11149
11150           if (i & 0x80000000)
11151             i = ~i;
11152
11153           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11154
11155           masked_const = i & 0xffff8000;
11156           if (masked_const != 0)
11157             {
11158               cost++;
11159               masked_const = i & 0xf8000000;
11160               if (masked_const != 0)
11161                 cost++;
11162             }
11163           *total = COSTS_N_INSNS (cost);
11164           return false;
11165         }
11166
11167       if (mode == SImode)
11168         {
11169           *total = COSTS_N_INSNS (3);
11170           return false;
11171         }
11172
11173       /* Requires a lib call */
11174       *total = COSTS_N_INSNS (20);
11175       return false;
11176
11177     default:
11178       return arm_rtx_costs_1 (x, outer_code, total, speed);
11179     }
11180 }
11181
11182
11183 /* RTX costs for 9e (and later) cores.  */
11184
11185 static bool
11186 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11187                   int *total, bool speed)
11188 {
11189   enum machine_mode mode = GET_MODE (x);
11190
11191   if (TARGET_THUMB1)
11192     {
11193       switch (code)
11194         {
11195         case MULT:
11196           *total = COSTS_N_INSNS (3);
11197           return true;
11198
11199         default:
11200           *total = thumb1_rtx_costs (x, code, outer_code);
11201           return true;
11202         }
11203     }
11204
11205   switch (code)
11206     {
11207     case MULT:
11208       /* There is no point basing this on the tuning, since it is always the
11209          fast variant if it exists at all.  */
11210       if (mode == DImode
11211           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11212           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11213               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11214         {
11215           *total = COSTS_N_INSNS (2);
11216           return false;
11217         }
11218
11219
11220       if (mode == DImode)
11221         {
11222           *total = COSTS_N_INSNS (5);
11223           return false;
11224         }
11225
11226       if (mode == SImode)
11227         {
11228           *total = COSTS_N_INSNS (2);
11229           return false;
11230         }
11231
11232       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11233         {
11234           if (TARGET_HARD_FLOAT
11235               && (mode == SFmode
11236                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11237             {
11238               *total = COSTS_N_INSNS (1);
11239               return false;
11240             }
11241         }
11242
11243       *total = COSTS_N_INSNS (20);
11244       return false;
11245
11246     default:
11247       return arm_rtx_costs_1 (x, outer_code, total, speed);
11248     }
11249 }
11250 /* All address computations that can be done are free, but rtx cost returns
11251    the same for practically all of them.  So we weight the different types
11252    of address here in the order (most pref first):
11253    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11254 static inline int
11255 arm_arm_address_cost (rtx x)
11256 {
11257   enum rtx_code c  = GET_CODE (x);
11258
11259   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11260     return 0;
11261   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11262     return 10;
11263
11264   if (c == PLUS)
11265     {
11266       if (CONST_INT_P (XEXP (x, 1)))
11267         return 2;
11268
11269       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11270         return 3;
11271
11272       return 4;
11273     }
11274
11275   return 6;
11276 }
11277
11278 static inline int
11279 arm_thumb_address_cost (rtx x)
11280 {
11281   enum rtx_code c  = GET_CODE (x);
11282
11283   if (c == REG)
11284     return 1;
11285   if (c == PLUS
11286       && REG_P (XEXP (x, 0))
11287       && CONST_INT_P (XEXP (x, 1)))
11288     return 1;
11289
11290   return 2;
11291 }
11292
11293 static int
11294 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11295                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11296 {
11297   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11298 }
11299
11300 /* Adjust cost hook for XScale.  */
11301 static bool
11302 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11303 {
11304   /* Some true dependencies can have a higher cost depending
11305      on precisely how certain input operands are used.  */
11306   if (REG_NOTE_KIND(link) == 0
11307       && recog_memoized (insn) >= 0
11308       && recog_memoized (dep) >= 0)
11309     {
11310       int shift_opnum = get_attr_shift (insn);
11311       enum attr_type attr_type = get_attr_type (dep);
11312
11313       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11314          operand for INSN.  If we have a shifted input operand and the
11315          instruction we depend on is another ALU instruction, then we may
11316          have to account for an additional stall.  */
11317       if (shift_opnum != 0
11318           && (attr_type == TYPE_ALU_SHIFT_IMM
11319               || attr_type == TYPE_ALUS_SHIFT_IMM
11320               || attr_type == TYPE_LOGIC_SHIFT_IMM
11321               || attr_type == TYPE_LOGICS_SHIFT_IMM
11322               || attr_type == TYPE_ALU_SHIFT_REG
11323               || attr_type == TYPE_ALUS_SHIFT_REG
11324               || attr_type == TYPE_LOGIC_SHIFT_REG
11325               || attr_type == TYPE_LOGICS_SHIFT_REG
11326               || attr_type == TYPE_MOV_SHIFT
11327               || attr_type == TYPE_MVN_SHIFT
11328               || attr_type == TYPE_MOV_SHIFT_REG
11329               || attr_type == TYPE_MVN_SHIFT_REG))
11330         {
11331           rtx shifted_operand;
11332           int opno;
11333
11334           /* Get the shifted operand.  */
11335           extract_insn (insn);
11336           shifted_operand = recog_data.operand[shift_opnum];
11337
11338           /* Iterate over all the operands in DEP.  If we write an operand
11339              that overlaps with SHIFTED_OPERAND, then we have increase the
11340              cost of this dependency.  */
11341           extract_insn (dep);
11342           preprocess_constraints ();
11343           for (opno = 0; opno < recog_data.n_operands; opno++)
11344             {
11345               /* We can ignore strict inputs.  */
11346               if (recog_data.operand_type[opno] == OP_IN)
11347                 continue;
11348
11349               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11350                                            shifted_operand))
11351                 {
11352                   *cost = 2;
11353                   return false;
11354                 }
11355             }
11356         }
11357     }
11358   return true;
11359 }
11360
11361 /* Adjust cost hook for Cortex A9.  */
11362 static bool
11363 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11364 {
11365   switch (REG_NOTE_KIND (link))
11366     {
11367     case REG_DEP_ANTI:
11368       *cost = 0;
11369       return false;
11370
11371     case REG_DEP_TRUE:
11372     case REG_DEP_OUTPUT:
11373         if (recog_memoized (insn) >= 0
11374             && recog_memoized (dep) >= 0)
11375           {
11376             if (GET_CODE (PATTERN (insn)) == SET)
11377               {
11378                 if (GET_MODE_CLASS
11379                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11380                   || GET_MODE_CLASS
11381                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11382                   {
11383                     enum attr_type attr_type_insn = get_attr_type (insn);
11384                     enum attr_type attr_type_dep = get_attr_type (dep);
11385
11386                     /* By default all dependencies of the form
11387                        s0 = s0 <op> s1
11388                        s0 = s0 <op> s2
11389                        have an extra latency of 1 cycle because
11390                        of the input and output dependency in this
11391                        case. However this gets modeled as an true
11392                        dependency and hence all these checks.  */
11393                     if (REG_P (SET_DEST (PATTERN (insn)))
11394                         && REG_P (SET_DEST (PATTERN (dep)))
11395                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11396                                                     SET_DEST (PATTERN (dep))))
11397                       {
11398                         /* FMACS is a special case where the dependent
11399                            instruction can be issued 3 cycles before
11400                            the normal latency in case of an output
11401                            dependency.  */
11402                         if ((attr_type_insn == TYPE_FMACS
11403                              || attr_type_insn == TYPE_FMACD)
11404                             && (attr_type_dep == TYPE_FMACS
11405                                 || attr_type_dep == TYPE_FMACD))
11406                           {
11407                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11408                               *cost = insn_default_latency (dep) - 3;
11409                             else
11410                               *cost = insn_default_latency (dep);
11411                             return false;
11412                           }
11413                         else
11414                           {
11415                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11416                               *cost = insn_default_latency (dep) + 1;
11417                             else
11418                               *cost = insn_default_latency (dep);
11419                           }
11420                         return false;
11421                       }
11422                   }
11423               }
11424           }
11425         break;
11426
11427     default:
11428       gcc_unreachable ();
11429     }
11430
11431   return true;
11432 }
11433
11434 /* Adjust cost hook for FA726TE.  */
11435 static bool
11436 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11437 {
11438   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11439      have penalty of 3.  */
11440   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11441       && recog_memoized (insn) >= 0
11442       && recog_memoized (dep) >= 0
11443       && get_attr_conds (dep) == CONDS_SET)
11444     {
11445       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11446       if (get_attr_conds (insn) == CONDS_USE
11447           && get_attr_type (insn) != TYPE_BRANCH)
11448         {
11449           *cost = 3;
11450           return false;
11451         }
11452
11453       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11454           || get_attr_conds (insn) == CONDS_USE)
11455         {
11456           *cost = 0;
11457           return false;
11458         }
11459     }
11460
11461   return true;
11462 }
11463
11464 /* Implement TARGET_REGISTER_MOVE_COST.
11465
11466    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11467    it is typically more expensive than a single memory access.  We set
11468    the cost to less than two memory accesses so that floating
11469    point to integer conversion does not go through memory.  */
11470
11471 int
11472 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11473                         reg_class_t from, reg_class_t to)
11474 {
11475   if (TARGET_32BIT)
11476     {
11477       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11478           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11479         return 15;
11480       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11481                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11482         return 4;
11483       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11484         return 20;
11485       else
11486         return 2;
11487     }
11488   else
11489     {
11490       if (from == HI_REGS || to == HI_REGS)
11491         return 4;
11492       else
11493         return 2;
11494     }
11495 }
11496
11497 /* Implement TARGET_MEMORY_MOVE_COST.  */
11498
11499 int
11500 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11501                       bool in ATTRIBUTE_UNUSED)
11502 {
11503   if (TARGET_32BIT)
11504     return 10;
11505   else
11506     {
11507       if (GET_MODE_SIZE (mode) < 4)
11508         return 8;
11509       else
11510         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11511     }
11512 }
11513
11514 /* Vectorizer cost model implementation.  */
11515
11516 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11517 static int
11518 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11519                                 tree vectype,
11520                                 int misalign ATTRIBUTE_UNUSED)
11521 {
11522   unsigned elements;
11523
11524   switch (type_of_cost)
11525     {
11526       case scalar_stmt:
11527         return current_tune->vec_costs->scalar_stmt_cost;
11528
11529       case scalar_load:
11530         return current_tune->vec_costs->scalar_load_cost;
11531
11532       case scalar_store:
11533         return current_tune->vec_costs->scalar_store_cost;
11534
11535       case vector_stmt:
11536         return current_tune->vec_costs->vec_stmt_cost;
11537
11538       case vector_load:
11539         return current_tune->vec_costs->vec_align_load_cost;
11540
11541       case vector_store:
11542         return current_tune->vec_costs->vec_store_cost;
11543
11544       case vec_to_scalar:
11545         return current_tune->vec_costs->vec_to_scalar_cost;
11546
11547       case scalar_to_vec:
11548         return current_tune->vec_costs->scalar_to_vec_cost;
11549
11550       case unaligned_load:
11551         return current_tune->vec_costs->vec_unalign_load_cost;
11552
11553       case unaligned_store:
11554         return current_tune->vec_costs->vec_unalign_store_cost;
11555
11556       case cond_branch_taken:
11557         return current_tune->vec_costs->cond_taken_branch_cost;
11558
11559       case cond_branch_not_taken:
11560         return current_tune->vec_costs->cond_not_taken_branch_cost;
11561
11562       case vec_perm:
11563       case vec_promote_demote:
11564         return current_tune->vec_costs->vec_stmt_cost;
11565
11566       case vec_construct:
11567         elements = TYPE_VECTOR_SUBPARTS (vectype);
11568         return elements / 2 + 1;
11569
11570       default:
11571         gcc_unreachable ();
11572     }
11573 }
11574
11575 /* Implement targetm.vectorize.add_stmt_cost.  */
11576
11577 static unsigned
11578 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11579                    struct _stmt_vec_info *stmt_info, int misalign,
11580                    enum vect_cost_model_location where)
11581 {
11582   unsigned *cost = (unsigned *) data;
11583   unsigned retval = 0;
11584
11585   if (flag_vect_cost_model)
11586     {
11587       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11588       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11589
11590       /* Statements in an inner loop relative to the loop being
11591          vectorized are weighted more heavily.  The value here is
11592          arbitrary and could potentially be improved with analysis.  */
11593       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11594         count *= 50;  /* FIXME.  */
11595
11596       retval = (unsigned) (count * stmt_cost);
11597       cost[where] += retval;
11598     }
11599
11600   return retval;
11601 }
11602
11603 /* Return true if and only if this insn can dual-issue only as older.  */
11604 static bool
11605 cortexa7_older_only (rtx insn)
11606 {
11607   if (recog_memoized (insn) < 0)
11608     return false;
11609
11610   switch (get_attr_type (insn))
11611     {
11612     case TYPE_ALU_REG:
11613     case TYPE_ALUS_REG:
11614     case TYPE_LOGIC_REG:
11615     case TYPE_LOGICS_REG:
11616     case TYPE_ADC_REG:
11617     case TYPE_ADCS_REG:
11618     case TYPE_ADR:
11619     case TYPE_BFM:
11620     case TYPE_REV:
11621     case TYPE_MVN_REG:
11622     case TYPE_SHIFT_IMM:
11623     case TYPE_SHIFT_REG:
11624     case TYPE_LOAD_BYTE:
11625     case TYPE_LOAD1:
11626     case TYPE_STORE1:
11627     case TYPE_FFARITHS:
11628     case TYPE_FADDS:
11629     case TYPE_FFARITHD:
11630     case TYPE_FADDD:
11631     case TYPE_FMOV:
11632     case TYPE_F_CVT:
11633     case TYPE_FCMPS:
11634     case TYPE_FCMPD:
11635     case TYPE_FCONSTS:
11636     case TYPE_FCONSTD:
11637     case TYPE_FMULS:
11638     case TYPE_FMACS:
11639     case TYPE_FMULD:
11640     case TYPE_FMACD:
11641     case TYPE_FDIVS:
11642     case TYPE_FDIVD:
11643     case TYPE_F_MRC:
11644     case TYPE_F_MRRC:
11645     case TYPE_F_FLAG:
11646     case TYPE_F_LOADS:
11647     case TYPE_F_STORES:
11648       return true;
11649     default:
11650       return false;
11651     }
11652 }
11653
11654 /* Return true if and only if this insn can dual-issue as younger.  */
11655 static bool
11656 cortexa7_younger (FILE *file, int verbose, rtx insn)
11657 {
11658   if (recog_memoized (insn) < 0)
11659     {
11660       if (verbose > 5)
11661         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11662       return false;
11663     }
11664
11665   switch (get_attr_type (insn))
11666     {
11667     case TYPE_ALU_IMM:
11668     case TYPE_ALUS_IMM:
11669     case TYPE_LOGIC_IMM:
11670     case TYPE_LOGICS_IMM:
11671     case TYPE_EXTEND:
11672     case TYPE_MVN_IMM:
11673     case TYPE_MOV_IMM:
11674     case TYPE_MOV_REG:
11675     case TYPE_MOV_SHIFT:
11676     case TYPE_MOV_SHIFT_REG:
11677     case TYPE_BRANCH:
11678     case TYPE_CALL:
11679       return true;
11680     default:
11681       return false;
11682     }
11683 }
11684
11685
11686 /* Look for an instruction that can dual issue only as an older
11687    instruction, and move it in front of any instructions that can
11688    dual-issue as younger, while preserving the relative order of all
11689    other instructions in the ready list.  This is a hueuristic to help
11690    dual-issue in later cycles, by postponing issue of more flexible
11691    instructions.  This heuristic may affect dual issue opportunities
11692    in the current cycle.  */
11693 static void
11694 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11695                         int clock)
11696 {
11697   int i;
11698   int first_older_only = -1, first_younger = -1;
11699
11700   if (verbose > 5)
11701     fprintf (file,
11702              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11703              clock,
11704              *n_readyp);
11705
11706   /* Traverse the ready list from the head (the instruction to issue
11707      first), and looking for the first instruction that can issue as
11708      younger and the first instruction that can dual-issue only as
11709      older.  */
11710   for (i = *n_readyp - 1; i >= 0; i--)
11711     {
11712       rtx insn = ready[i];
11713       if (cortexa7_older_only (insn))
11714         {
11715           first_older_only = i;
11716           if (verbose > 5)
11717             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11718           break;
11719         }
11720       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11721         first_younger = i;
11722     }
11723
11724   /* Nothing to reorder because either no younger insn found or insn
11725      that can dual-issue only as older appears before any insn that
11726      can dual-issue as younger.  */
11727   if (first_younger == -1)
11728     {
11729       if (verbose > 5)
11730         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11731       return;
11732     }
11733
11734   /* Nothing to reorder because no older-only insn in the ready list.  */
11735   if (first_older_only == -1)
11736     {
11737       if (verbose > 5)
11738         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11739       return;
11740     }
11741
11742   /* Move first_older_only insn before first_younger.  */
11743   if (verbose > 5)
11744     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11745              INSN_UID(ready [first_older_only]),
11746              INSN_UID(ready [first_younger]));
11747   rtx first_older_only_insn = ready [first_older_only];
11748   for (i = first_older_only; i < first_younger; i++)
11749     {
11750       ready[i] = ready[i+1];
11751     }
11752
11753   ready[i] = first_older_only_insn;
11754   return;
11755 }
11756
11757 /* Implement TARGET_SCHED_REORDER. */
11758 static int
11759 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11760                    int clock)
11761 {
11762   switch (arm_tune)
11763     {
11764     case cortexa7:
11765       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11766       break;
11767     default:
11768       /* Do nothing for other cores.  */
11769       break;
11770     }
11771
11772   return arm_issue_rate ();
11773 }
11774
11775 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11776    It corrects the value of COST based on the relationship between
11777    INSN and DEP through the dependence LINK.  It returns the new
11778    value. There is a per-core adjust_cost hook to adjust scheduler costs
11779    and the per-core hook can choose to completely override the generic
11780    adjust_cost function. Only put bits of code into arm_adjust_cost that
11781    are common across all cores.  */
11782 static int
11783 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11784 {
11785   rtx i_pat, d_pat;
11786
11787  /* When generating Thumb-1 code, we want to place flag-setting operations
11788     close to a conditional branch which depends on them, so that we can
11789     omit the comparison. */
11790   if (TARGET_THUMB1
11791       && REG_NOTE_KIND (link) == 0
11792       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11793       && recog_memoized (dep) >= 0
11794       && get_attr_conds (dep) == CONDS_SET)
11795     return 0;
11796
11797   if (current_tune->sched_adjust_cost != NULL)
11798     {
11799       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11800         return cost;
11801     }
11802
11803   /* XXX Is this strictly true?  */
11804   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11805       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11806     return 0;
11807
11808   /* Call insns don't incur a stall, even if they follow a load.  */
11809   if (REG_NOTE_KIND (link) == 0
11810       && CALL_P (insn))
11811     return 1;
11812
11813   if ((i_pat = single_set (insn)) != NULL
11814       && MEM_P (SET_SRC (i_pat))
11815       && (d_pat = single_set (dep)) != NULL
11816       && MEM_P (SET_DEST (d_pat)))
11817     {
11818       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11819       /* This is a load after a store, there is no conflict if the load reads
11820          from a cached area.  Assume that loads from the stack, and from the
11821          constant pool are cached, and that others will miss.  This is a
11822          hack.  */
11823
11824       if ((GET_CODE (src_mem) == SYMBOL_REF
11825            && CONSTANT_POOL_ADDRESS_P (src_mem))
11826           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11827           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11828           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11829         return 1;
11830     }
11831
11832   return cost;
11833 }
11834
11835 int
11836 arm_max_conditional_execute (void)
11837 {
11838   return max_insns_skipped;
11839 }
11840
11841 static int
11842 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11843 {
11844   if (TARGET_32BIT)
11845     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11846   else
11847     return (optimize > 0) ? 2 : 0;
11848 }
11849
11850 static int
11851 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11852 {
11853   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11854 }
11855
11856 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11857    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11858    sequences of non-executed instructions in IT blocks probably take the same
11859    amount of time as executed instructions (and the IT instruction itself takes
11860    space in icache).  This function was experimentally determined to give good
11861    results on a popular embedded benchmark.  */
11862
11863 static int
11864 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11865 {
11866   return (TARGET_32BIT && speed_p) ? 1
11867          : arm_default_branch_cost (speed_p, predictable_p);
11868 }
11869
11870 static bool fp_consts_inited = false;
11871
11872 static REAL_VALUE_TYPE value_fp0;
11873
11874 static void
11875 init_fp_table (void)
11876 {
11877   REAL_VALUE_TYPE r;
11878
11879   r = REAL_VALUE_ATOF ("0", DFmode);
11880   value_fp0 = r;
11881   fp_consts_inited = true;
11882 }
11883
11884 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11885 int
11886 arm_const_double_rtx (rtx x)
11887 {
11888   REAL_VALUE_TYPE r;
11889
11890   if (!fp_consts_inited)
11891     init_fp_table ();
11892
11893   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11894   if (REAL_VALUE_MINUS_ZERO (r))
11895     return 0;
11896
11897   if (REAL_VALUES_EQUAL (r, value_fp0))
11898     return 1;
11899
11900   return 0;
11901 }
11902
11903 /* VFPv3 has a fairly wide range of representable immediates, formed from
11904    "quarter-precision" floating-point values. These can be evaluated using this
11905    formula (with ^ for exponentiation):
11906
11907      -1^s * n * 2^-r
11908
11909    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11910    16 <= n <= 31 and 0 <= r <= 7.
11911
11912    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11913
11914      - A (most-significant) is the sign bit.
11915      - BCD are the exponent (encoded as r XOR 3).
11916      - EFGH are the mantissa (encoded as n - 16).
11917 */
11918
11919 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11920    fconst[sd] instruction, or -1 if X isn't suitable.  */
11921 static int
11922 vfp3_const_double_index (rtx x)
11923 {
11924   REAL_VALUE_TYPE r, m;
11925   int sign, exponent;
11926   unsigned HOST_WIDE_INT mantissa, mant_hi;
11927   unsigned HOST_WIDE_INT mask;
11928   HOST_WIDE_INT m1, m2;
11929   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11930
11931   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11932     return -1;
11933
11934   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11935
11936   /* We can't represent these things, so detect them first.  */
11937   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11938     return -1;
11939
11940   /* Extract sign, exponent and mantissa.  */
11941   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11942   r = real_value_abs (&r);
11943   exponent = REAL_EXP (&r);
11944   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11945      highest (sign) bit, with a fixed binary point at bit point_pos.
11946      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11947      bits for the mantissa, this may fail (low bits would be lost).  */
11948   real_ldexp (&m, &r, point_pos - exponent);
11949   REAL_VALUE_TO_INT (&m1, &m2, m);
11950   mantissa = m1;
11951   mant_hi = m2;
11952
11953   /* If there are bits set in the low part of the mantissa, we can't
11954      represent this value.  */
11955   if (mantissa != 0)
11956     return -1;
11957
11958   /* Now make it so that mantissa contains the most-significant bits, and move
11959      the point_pos to indicate that the least-significant bits have been
11960      discarded.  */
11961   point_pos -= HOST_BITS_PER_WIDE_INT;
11962   mantissa = mant_hi;
11963
11964   /* We can permit four significant bits of mantissa only, plus a high bit
11965      which is always 1.  */
11966   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11967   if ((mantissa & mask) != 0)
11968     return -1;
11969
11970   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11971   mantissa >>= point_pos - 5;
11972
11973   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11974      floating-point immediate zero with Neon using an integer-zero load, but
11975      that case is handled elsewhere.)  */
11976   if (mantissa == 0)
11977     return -1;
11978
11979   gcc_assert (mantissa >= 16 && mantissa <= 31);
11980
11981   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11982      normalized significands are in the range [1, 2). (Our mantissa is shifted
11983      left 4 places at this point relative to normalized IEEE754 values).  GCC
11984      internally uses [0.5, 1) (see real.c), so the exponent returned from
11985      REAL_EXP must be altered.  */
11986   exponent = 5 - exponent;
11987
11988   if (exponent < 0 || exponent > 7)
11989     return -1;
11990
11991   /* Sign, mantissa and exponent are now in the correct form to plug into the
11992      formula described in the comment above.  */
11993   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11994 }
11995
11996 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11997 int
11998 vfp3_const_double_rtx (rtx x)
11999 {
12000   if (!TARGET_VFP3)
12001     return 0;
12002
12003   return vfp3_const_double_index (x) != -1;
12004 }
12005
12006 /* Recognize immediates which can be used in various Neon instructions. Legal
12007    immediates are described by the following table (for VMVN variants, the
12008    bitwise inverse of the constant shown is recognized. In either case, VMOV
12009    is output and the correct instruction to use for a given constant is chosen
12010    by the assembler). The constant shown is replicated across all elements of
12011    the destination vector.
12012
12013    insn elems variant constant (binary)
12014    ---- ----- ------- -----------------
12015    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12016    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12017    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12018    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12019    vmov  i16     4    00000000 abcdefgh
12020    vmov  i16     5    abcdefgh 00000000
12021    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12022    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12023    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12024    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12025    vmvn  i16    10    00000000 abcdefgh
12026    vmvn  i16    11    abcdefgh 00000000
12027    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12028    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12029    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12030    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12031    vmov   i8    16    abcdefgh
12032    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12033                       eeeeeeee ffffffff gggggggg hhhhhhhh
12034    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12035    vmov  f32    19    00000000 00000000 00000000 00000000
12036
12037    For case 18, B = !b. Representable values are exactly those accepted by
12038    vfp3_const_double_index, but are output as floating-point numbers rather
12039    than indices.
12040
12041    For case 19, we will change it to vmov.i32 when assembling.
12042
12043    Variants 0-5 (inclusive) may also be used as immediates for the second
12044    operand of VORR/VBIC instructions.
12045
12046    The INVERSE argument causes the bitwise inverse of the given operand to be
12047    recognized instead (used for recognizing legal immediates for the VAND/VORN
12048    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12049    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12050    output, rather than the real insns vbic/vorr).
12051
12052    INVERSE makes no difference to the recognition of float vectors.
12053
12054    The return value is the variant of immediate as shown in the above table, or
12055    -1 if the given value doesn't match any of the listed patterns.
12056 */
12057 static int
12058 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12059                       rtx *modconst, int *elementwidth)
12060 {
12061 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12062   matches = 1;                                  \
12063   for (i = 0; i < idx; i += (STRIDE))           \
12064     if (!(TEST))                                \
12065       matches = 0;                              \
12066   if (matches)                                  \
12067     {                                           \
12068       immtype = (CLASS);                        \
12069       elsize = (ELSIZE);                        \
12070       break;                                    \
12071     }
12072
12073   unsigned int i, elsize = 0, idx = 0, n_elts;
12074   unsigned int innersize;
12075   unsigned char bytes[16];
12076   int immtype = -1, matches;
12077   unsigned int invmask = inverse ? 0xff : 0;
12078   bool vector = GET_CODE (op) == CONST_VECTOR;
12079
12080   if (vector)
12081     {
12082       n_elts = CONST_VECTOR_NUNITS (op);
12083       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12084     }
12085   else
12086     {
12087       n_elts = 1;
12088       if (mode == VOIDmode)
12089         mode = DImode;
12090       innersize = GET_MODE_SIZE (mode);
12091     }
12092
12093   /* Vectors of float constants.  */
12094   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12095     {
12096       rtx el0 = CONST_VECTOR_ELT (op, 0);
12097       REAL_VALUE_TYPE r0;
12098
12099       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12100         return -1;
12101
12102       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12103
12104       for (i = 1; i < n_elts; i++)
12105         {
12106           rtx elt = CONST_VECTOR_ELT (op, i);
12107           REAL_VALUE_TYPE re;
12108
12109           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12110
12111           if (!REAL_VALUES_EQUAL (r0, re))
12112             return -1;
12113         }
12114
12115       if (modconst)
12116         *modconst = CONST_VECTOR_ELT (op, 0);
12117
12118       if (elementwidth)
12119         *elementwidth = 0;
12120
12121       if (el0 == CONST0_RTX (GET_MODE (el0)))
12122         return 19;
12123       else
12124         return 18;
12125     }
12126
12127   /* Splat vector constant out into a byte vector.  */
12128   for (i = 0; i < n_elts; i++)
12129     {
12130       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12131       unsigned HOST_WIDE_INT elpart;
12132       unsigned int part, parts;
12133
12134       if (CONST_INT_P (el))
12135         {
12136           elpart = INTVAL (el);
12137           parts = 1;
12138         }
12139       else if (CONST_DOUBLE_P (el))
12140         {
12141           elpart = CONST_DOUBLE_LOW (el);
12142           parts = 2;
12143         }
12144       else
12145         gcc_unreachable ();
12146
12147       for (part = 0; part < parts; part++)
12148         {
12149           unsigned int byte;
12150           for (byte = 0; byte < innersize; byte++)
12151             {
12152               bytes[idx++] = (elpart & 0xff) ^ invmask;
12153               elpart >>= BITS_PER_UNIT;
12154             }
12155           if (CONST_DOUBLE_P (el))
12156             elpart = CONST_DOUBLE_HIGH (el);
12157         }
12158     }
12159
12160   /* Sanity check.  */
12161   gcc_assert (idx == GET_MODE_SIZE (mode));
12162
12163   do
12164     {
12165       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12166                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12167
12168       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12169                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12170
12171       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12172                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12173
12174       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12175                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12176
12177       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12178
12179       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12180
12181       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12182                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12183
12184       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12185                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12186
12187       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12188                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12189
12190       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12191                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12192
12193       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12194
12195       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12196
12197       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12198                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12199
12200       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12201                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12202
12203       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12204                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12205
12206       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12207                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12208
12209       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12210
12211       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12212                         && bytes[i] == bytes[(i + 8) % idx]);
12213     }
12214   while (0);
12215
12216   if (immtype == -1)
12217     return -1;
12218
12219   if (elementwidth)
12220     *elementwidth = elsize;
12221
12222   if (modconst)
12223     {
12224       unsigned HOST_WIDE_INT imm = 0;
12225
12226       /* Un-invert bytes of recognized vector, if necessary.  */
12227       if (invmask != 0)
12228         for (i = 0; i < idx; i++)
12229           bytes[i] ^= invmask;
12230
12231       if (immtype == 17)
12232         {
12233           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12234           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12235
12236           for (i = 0; i < 8; i++)
12237             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12238                    << (i * BITS_PER_UNIT);
12239
12240           *modconst = GEN_INT (imm);
12241         }
12242       else
12243         {
12244           unsigned HOST_WIDE_INT imm = 0;
12245
12246           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12247             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12248
12249           *modconst = GEN_INT (imm);
12250         }
12251     }
12252
12253   return immtype;
12254 #undef CHECK
12255 }
12256
12257 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12258    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12259    float elements), and a modified constant (whatever should be output for a
12260    VMOV) in *MODCONST.  */
12261
12262 int
12263 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12264                                rtx *modconst, int *elementwidth)
12265 {
12266   rtx tmpconst;
12267   int tmpwidth;
12268   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12269
12270   if (retval == -1)
12271     return 0;
12272
12273   if (modconst)
12274     *modconst = tmpconst;
12275
12276   if (elementwidth)
12277     *elementwidth = tmpwidth;
12278
12279   return 1;
12280 }
12281
12282 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12283    the immediate is valid, write a constant suitable for using as an operand
12284    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12285    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12286
12287 int
12288 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12289                                 rtx *modconst, int *elementwidth)
12290 {
12291   rtx tmpconst;
12292   int tmpwidth;
12293   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12294
12295   if (retval < 0 || retval > 5)
12296     return 0;
12297
12298   if (modconst)
12299     *modconst = tmpconst;
12300
12301   if (elementwidth)
12302     *elementwidth = tmpwidth;
12303
12304   return 1;
12305 }
12306
12307 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12308    the immediate is valid, write a constant suitable for using as an operand
12309    to VSHR/VSHL to *MODCONST and the corresponding element width to
12310    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12311    because they have different limitations.  */
12312
12313 int
12314 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12315                                 rtx *modconst, int *elementwidth,
12316                                 bool isleftshift)
12317 {
12318   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12319   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12320   unsigned HOST_WIDE_INT last_elt = 0;
12321   unsigned HOST_WIDE_INT maxshift;
12322
12323   /* Split vector constant out into a byte vector.  */
12324   for (i = 0; i < n_elts; i++)
12325     {
12326       rtx el = CONST_VECTOR_ELT (op, i);
12327       unsigned HOST_WIDE_INT elpart;
12328
12329       if (CONST_INT_P (el))
12330         elpart = INTVAL (el);
12331       else if (CONST_DOUBLE_P (el))
12332         return 0;
12333       else
12334         gcc_unreachable ();
12335
12336       if (i != 0 && elpart != last_elt)
12337         return 0;
12338
12339       last_elt = elpart;
12340     }
12341
12342   /* Shift less than element size.  */
12343   maxshift = innersize * 8;
12344
12345   if (isleftshift)
12346     {
12347       /* Left shift immediate value can be from 0 to <size>-1.  */
12348       if (last_elt >= maxshift)
12349         return 0;
12350     }
12351   else
12352     {
12353       /* Right shift immediate value can be from 1 to <size>.  */
12354       if (last_elt == 0 || last_elt > maxshift)
12355         return 0;
12356     }
12357
12358   if (elementwidth)
12359     *elementwidth = innersize * 8;
12360
12361   if (modconst)
12362     *modconst = CONST_VECTOR_ELT (op, 0);
12363
12364   return 1;
12365 }
12366
12367 /* Return a string suitable for output of Neon immediate logic operation
12368    MNEM.  */
12369
12370 char *
12371 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12372                              int inverse, int quad)
12373 {
12374   int width, is_valid;
12375   static char templ[40];
12376
12377   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12378
12379   gcc_assert (is_valid != 0);
12380
12381   if (quad)
12382     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12383   else
12384     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12385
12386   return templ;
12387 }
12388
12389 /* Return a string suitable for output of Neon immediate shift operation
12390    (VSHR or VSHL) MNEM.  */
12391
12392 char *
12393 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12394                              enum machine_mode mode, int quad,
12395                              bool isleftshift)
12396 {
12397   int width, is_valid;
12398   static char templ[40];
12399
12400   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12401   gcc_assert (is_valid != 0);
12402
12403   if (quad)
12404     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12405   else
12406     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12407
12408   return templ;
12409 }
12410
12411 /* Output a sequence of pairwise operations to implement a reduction.
12412    NOTE: We do "too much work" here, because pairwise operations work on two
12413    registers-worth of operands in one go. Unfortunately we can't exploit those
12414    extra calculations to do the full operation in fewer steps, I don't think.
12415    Although all vector elements of the result but the first are ignored, we
12416    actually calculate the same result in each of the elements. An alternative
12417    such as initially loading a vector with zero to use as each of the second
12418    operands would use up an additional register and take an extra instruction,
12419    for no particular gain.  */
12420
12421 void
12422 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12423                       rtx (*reduc) (rtx, rtx, rtx))
12424 {
12425   enum machine_mode inner = GET_MODE_INNER (mode);
12426   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12427   rtx tmpsum = op1;
12428
12429   for (i = parts / 2; i >= 1; i /= 2)
12430     {
12431       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12432       emit_insn (reduc (dest, tmpsum, tmpsum));
12433       tmpsum = dest;
12434     }
12435 }
12436
12437 /* If VALS is a vector constant that can be loaded into a register
12438    using VDUP, generate instructions to do so and return an RTX to
12439    assign to the register.  Otherwise return NULL_RTX.  */
12440
12441 static rtx
12442 neon_vdup_constant (rtx vals)
12443 {
12444   enum machine_mode mode = GET_MODE (vals);
12445   enum machine_mode inner_mode = GET_MODE_INNER (mode);
12446   int n_elts = GET_MODE_NUNITS (mode);
12447   bool all_same = true;
12448   rtx x;
12449   int i;
12450
12451   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12452     return NULL_RTX;
12453
12454   for (i = 0; i < n_elts; ++i)
12455     {
12456       x = XVECEXP (vals, 0, i);
12457       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12458         all_same = false;
12459     }
12460
12461   if (!all_same)
12462     /* The elements are not all the same.  We could handle repeating
12463        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12464        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12465        vdup.i16).  */
12466     return NULL_RTX;
12467
12468   /* We can load this constant by using VDUP and a constant in a
12469      single ARM register.  This will be cheaper than a vector
12470      load.  */
12471
12472   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12473   return gen_rtx_VEC_DUPLICATE (mode, x);
12474 }
12475
12476 /* Generate code to load VALS, which is a PARALLEL containing only
12477    constants (for vec_init) or CONST_VECTOR, efficiently into a
12478    register.  Returns an RTX to copy into the register, or NULL_RTX
12479    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12480
12481 rtx
12482 neon_make_constant (rtx vals)
12483 {
12484   enum machine_mode mode = GET_MODE (vals);
12485   rtx target;
12486   rtx const_vec = NULL_RTX;
12487   int n_elts = GET_MODE_NUNITS (mode);
12488   int n_const = 0;
12489   int i;
12490
12491   if (GET_CODE (vals) == CONST_VECTOR)
12492     const_vec = vals;
12493   else if (GET_CODE (vals) == PARALLEL)
12494     {
12495       /* A CONST_VECTOR must contain only CONST_INTs and
12496          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12497          Only store valid constants in a CONST_VECTOR.  */
12498       for (i = 0; i < n_elts; ++i)
12499         {
12500           rtx x = XVECEXP (vals, 0, i);
12501           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12502             n_const++;
12503         }
12504       if (n_const == n_elts)
12505         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12506     }
12507   else
12508     gcc_unreachable ();
12509
12510   if (const_vec != NULL
12511       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12512     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12513     return const_vec;
12514   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12515     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12516        pipeline cycle; creating the constant takes one or two ARM
12517        pipeline cycles.  */
12518     return target;
12519   else if (const_vec != NULL_RTX)
12520     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12521        (for either double or quad vectors).  We can not take advantage
12522        of single-cycle VLD1 because we need a PC-relative addressing
12523        mode.  */
12524     return const_vec;
12525   else
12526     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12527        We can not construct an initializer.  */
12528     return NULL_RTX;
12529 }
12530
12531 /* Initialize vector TARGET to VALS.  */
12532
12533 void
12534 neon_expand_vector_init (rtx target, rtx vals)
12535 {
12536   enum machine_mode mode = GET_MODE (target);
12537   enum machine_mode inner_mode = GET_MODE_INNER (mode);
12538   int n_elts = GET_MODE_NUNITS (mode);
12539   int n_var = 0, one_var = -1;
12540   bool all_same = true;
12541   rtx x, mem;
12542   int i;
12543
12544   for (i = 0; i < n_elts; ++i)
12545     {
12546       x = XVECEXP (vals, 0, i);
12547       if (!CONSTANT_P (x))
12548         ++n_var, one_var = i;
12549
12550       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12551         all_same = false;
12552     }
12553
12554   if (n_var == 0)
12555     {
12556       rtx constant = neon_make_constant (vals);
12557       if (constant != NULL_RTX)
12558         {
12559           emit_move_insn (target, constant);
12560           return;
12561         }
12562     }
12563
12564   /* Splat a single non-constant element if we can.  */
12565   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12566     {
12567       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12568       emit_insn (gen_rtx_SET (VOIDmode, target,
12569                               gen_rtx_VEC_DUPLICATE (mode, x)));
12570       return;
12571     }
12572
12573   /* One field is non-constant.  Load constant then overwrite varying
12574      field.  This is more efficient than using the stack.  */
12575   if (n_var == 1)
12576     {
12577       rtx copy = copy_rtx (vals);
12578       rtx index = GEN_INT (one_var);
12579
12580       /* Load constant part of vector, substitute neighboring value for
12581          varying element.  */
12582       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12583       neon_expand_vector_init (target, copy);
12584
12585       /* Insert variable.  */
12586       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12587       switch (mode)
12588         {
12589         case V8QImode:
12590           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12591           break;
12592         case V16QImode:
12593           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12594           break;
12595         case V4HImode:
12596           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12597           break;
12598         case V8HImode:
12599           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12600           break;
12601         case V2SImode:
12602           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12603           break;
12604         case V4SImode:
12605           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12606           break;
12607         case V2SFmode:
12608           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12609           break;
12610         case V4SFmode:
12611           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12612           break;
12613         case V2DImode:
12614           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12615           break;
12616         default:
12617           gcc_unreachable ();
12618         }
12619       return;
12620     }
12621
12622   /* Construct the vector in memory one field at a time
12623      and load the whole vector.  */
12624   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12625   for (i = 0; i < n_elts; i++)
12626     emit_move_insn (adjust_address_nv (mem, inner_mode,
12627                                     i * GET_MODE_SIZE (inner_mode)),
12628                     XVECEXP (vals, 0, i));
12629   emit_move_insn (target, mem);
12630 }
12631
12632 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12633    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12634    reported source locations are bogus.  */
12635
12636 static void
12637 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12638               const char *err)
12639 {
12640   HOST_WIDE_INT lane;
12641
12642   gcc_assert (CONST_INT_P (operand));
12643
12644   lane = INTVAL (operand);
12645
12646   if (lane < low || lane >= high)
12647     error (err);
12648 }
12649
12650 /* Bounds-check lanes.  */
12651
12652 void
12653 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12654 {
12655   bounds_check (operand, low, high, "lane out of range");
12656 }
12657
12658 /* Bounds-check constants.  */
12659
12660 void
12661 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12662 {
12663   bounds_check (operand, low, high, "constant out of range");
12664 }
12665
12666 HOST_WIDE_INT
12667 neon_element_bits (enum machine_mode mode)
12668 {
12669   if (mode == DImode)
12670     return GET_MODE_BITSIZE (mode);
12671   else
12672     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12673 }
12674
12675 \f
12676 /* Predicates for `match_operand' and `match_operator'.  */
12677
12678 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12679    WB is true if full writeback address modes are allowed and is false
12680    if limited writeback address modes (POST_INC and PRE_DEC) are
12681    allowed.  */
12682
12683 int
12684 arm_coproc_mem_operand (rtx op, bool wb)
12685 {
12686   rtx ind;
12687
12688   /* Reject eliminable registers.  */
12689   if (! (reload_in_progress || reload_completed || lra_in_progress)
12690       && (   reg_mentioned_p (frame_pointer_rtx, op)
12691           || reg_mentioned_p (arg_pointer_rtx, op)
12692           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12693           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12694           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12695           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12696     return FALSE;
12697
12698   /* Constants are converted into offsets from labels.  */
12699   if (!MEM_P (op))
12700     return FALSE;
12701
12702   ind = XEXP (op, 0);
12703
12704   if (reload_completed
12705       && (GET_CODE (ind) == LABEL_REF
12706           || (GET_CODE (ind) == CONST
12707               && GET_CODE (XEXP (ind, 0)) == PLUS
12708               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12709               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12710     return TRUE;
12711
12712   /* Match: (mem (reg)).  */
12713   if (REG_P (ind))
12714     return arm_address_register_rtx_p (ind, 0);
12715
12716   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12717      acceptable in any case (subject to verification by
12718      arm_address_register_rtx_p).  We need WB to be true to accept
12719      PRE_INC and POST_DEC.  */
12720   if (GET_CODE (ind) == POST_INC
12721       || GET_CODE (ind) == PRE_DEC
12722       || (wb
12723           && (GET_CODE (ind) == PRE_INC
12724               || GET_CODE (ind) == POST_DEC)))
12725     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12726
12727   if (wb
12728       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12729       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12730       && GET_CODE (XEXP (ind, 1)) == PLUS
12731       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12732     ind = XEXP (ind, 1);
12733
12734   /* Match:
12735      (plus (reg)
12736            (const)).  */
12737   if (GET_CODE (ind) == PLUS
12738       && REG_P (XEXP (ind, 0))
12739       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12740       && CONST_INT_P (XEXP (ind, 1))
12741       && INTVAL (XEXP (ind, 1)) > -1024
12742       && INTVAL (XEXP (ind, 1)) <  1024
12743       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12744     return TRUE;
12745
12746   return FALSE;
12747 }
12748
12749 /* Return TRUE if OP is a memory operand which we can load or store a vector
12750    to/from. TYPE is one of the following values:
12751     0 - Vector load/stor (vldr)
12752     1 - Core registers (ldm)
12753     2 - Element/structure loads (vld1)
12754  */
12755 int
12756 neon_vector_mem_operand (rtx op, int type, bool strict)
12757 {
12758   rtx ind;
12759
12760   /* Reject eliminable registers.  */
12761   if (! (reload_in_progress || reload_completed)
12762       && (   reg_mentioned_p (frame_pointer_rtx, op)
12763           || reg_mentioned_p (arg_pointer_rtx, op)
12764           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12765           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12766           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12767           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12768     return !strict;
12769
12770   /* Constants are converted into offsets from labels.  */
12771   if (!MEM_P (op))
12772     return FALSE;
12773
12774   ind = XEXP (op, 0);
12775
12776   if (reload_completed
12777       && (GET_CODE (ind) == LABEL_REF
12778           || (GET_CODE (ind) == CONST
12779               && GET_CODE (XEXP (ind, 0)) == PLUS
12780               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12781               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12782     return TRUE;
12783
12784   /* Match: (mem (reg)).  */
12785   if (REG_P (ind))
12786     return arm_address_register_rtx_p (ind, 0);
12787
12788   /* Allow post-increment with Neon registers.  */
12789   if ((type != 1 && GET_CODE (ind) == POST_INC)
12790       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12791     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12792
12793   /* FIXME: vld1 allows register post-modify.  */
12794
12795   /* Match:
12796      (plus (reg)
12797           (const)).  */
12798   if (type == 0
12799       && GET_CODE (ind) == PLUS
12800       && REG_P (XEXP (ind, 0))
12801       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12802       && CONST_INT_P (XEXP (ind, 1))
12803       && INTVAL (XEXP (ind, 1)) > -1024
12804       /* For quad modes, we restrict the constant offset to be slightly less
12805          than what the instruction format permits.  We have no such constraint
12806          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12807       && (INTVAL (XEXP (ind, 1))
12808           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12809       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12810     return TRUE;
12811
12812   return FALSE;
12813 }
12814
12815 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12816    type.  */
12817 int
12818 neon_struct_mem_operand (rtx op)
12819 {
12820   rtx ind;
12821
12822   /* Reject eliminable registers.  */
12823   if (! (reload_in_progress || reload_completed)
12824       && (   reg_mentioned_p (frame_pointer_rtx, op)
12825           || reg_mentioned_p (arg_pointer_rtx, op)
12826           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12827           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12828           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12829           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12830     return FALSE;
12831
12832   /* Constants are converted into offsets from labels.  */
12833   if (!MEM_P (op))
12834     return FALSE;
12835
12836   ind = XEXP (op, 0);
12837
12838   if (reload_completed
12839       && (GET_CODE (ind) == LABEL_REF
12840           || (GET_CODE (ind) == CONST
12841               && GET_CODE (XEXP (ind, 0)) == PLUS
12842               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12843               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12844     return TRUE;
12845
12846   /* Match: (mem (reg)).  */
12847   if (REG_P (ind))
12848     return arm_address_register_rtx_p (ind, 0);
12849
12850   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12851   if (GET_CODE (ind) == POST_INC
12852       || GET_CODE (ind) == PRE_DEC)
12853     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12854
12855   return FALSE;
12856 }
12857
12858 /* Return true if X is a register that will be eliminated later on.  */
12859 int
12860 arm_eliminable_register (rtx x)
12861 {
12862   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12863                        || REGNO (x) == ARG_POINTER_REGNUM
12864                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12865                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12866 }
12867
12868 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12869    coprocessor registers.  Otherwise return NO_REGS.  */
12870
12871 enum reg_class
12872 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12873 {
12874   if (mode == HFmode)
12875     {
12876       if (!TARGET_NEON_FP16)
12877         return GENERAL_REGS;
12878       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12879         return NO_REGS;
12880       return GENERAL_REGS;
12881     }
12882
12883   /* The neon move patterns handle all legitimate vector and struct
12884      addresses.  */
12885   if (TARGET_NEON
12886       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12887       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12888           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12889           || VALID_NEON_STRUCT_MODE (mode)))
12890     return NO_REGS;
12891
12892   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12893     return NO_REGS;
12894
12895   return GENERAL_REGS;
12896 }
12897
12898 /* Values which must be returned in the most-significant end of the return
12899    register.  */
12900
12901 static bool
12902 arm_return_in_msb (const_tree valtype)
12903 {
12904   return (TARGET_AAPCS_BASED
12905           && BYTES_BIG_ENDIAN
12906           && (AGGREGATE_TYPE_P (valtype)
12907               || TREE_CODE (valtype) == COMPLEX_TYPE
12908               || FIXED_POINT_TYPE_P (valtype)));
12909 }
12910
12911 /* Return TRUE if X references a SYMBOL_REF.  */
12912 int
12913 symbol_mentioned_p (rtx x)
12914 {
12915   const char * fmt;
12916   int i;
12917
12918   if (GET_CODE (x) == SYMBOL_REF)
12919     return 1;
12920
12921   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12922      are constant offsets, not symbols.  */
12923   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12924     return 0;
12925
12926   fmt = GET_RTX_FORMAT (GET_CODE (x));
12927
12928   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12929     {
12930       if (fmt[i] == 'E')
12931         {
12932           int j;
12933
12934           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12935             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12936               return 1;
12937         }
12938       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12939         return 1;
12940     }
12941
12942   return 0;
12943 }
12944
12945 /* Return TRUE if X references a LABEL_REF.  */
12946 int
12947 label_mentioned_p (rtx x)
12948 {
12949   const char * fmt;
12950   int i;
12951
12952   if (GET_CODE (x) == LABEL_REF)
12953     return 1;
12954
12955   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12956      instruction, but they are constant offsets, not symbols.  */
12957   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12958     return 0;
12959
12960   fmt = GET_RTX_FORMAT (GET_CODE (x));
12961   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12962     {
12963       if (fmt[i] == 'E')
12964         {
12965           int j;
12966
12967           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12968             if (label_mentioned_p (XVECEXP (x, i, j)))
12969               return 1;
12970         }
12971       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12972         return 1;
12973     }
12974
12975   return 0;
12976 }
12977
12978 int
12979 tls_mentioned_p (rtx x)
12980 {
12981   switch (GET_CODE (x))
12982     {
12983     case CONST:
12984       return tls_mentioned_p (XEXP (x, 0));
12985
12986     case UNSPEC:
12987       if (XINT (x, 1) == UNSPEC_TLS)
12988         return 1;
12989
12990     default:
12991       return 0;
12992     }
12993 }
12994
12995 /* Must not copy any rtx that uses a pc-relative address.  */
12996
12997 static int
12998 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12999 {
13000   if (GET_CODE (*x) == UNSPEC
13001       && (XINT (*x, 1) == UNSPEC_PIC_BASE
13002           || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13003     return 1;
13004   return 0;
13005 }
13006
13007 static bool
13008 arm_cannot_copy_insn_p (rtx insn)
13009 {
13010   /* The tls call insn cannot be copied, as it is paired with a data
13011      word.  */
13012   if (recog_memoized (insn) == CODE_FOR_tlscall)
13013     return true;
13014
13015   return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13016 }
13017
13018 enum rtx_code
13019 minmax_code (rtx x)
13020 {
13021   enum rtx_code code = GET_CODE (x);
13022
13023   switch (code)
13024     {
13025     case SMAX:
13026       return GE;
13027     case SMIN:
13028       return LE;
13029     case UMIN:
13030       return LEU;
13031     case UMAX:
13032       return GEU;
13033     default:
13034       gcc_unreachable ();
13035     }
13036 }
13037
13038 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13039
13040 bool
13041 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13042                         int *mask, bool *signed_sat)
13043 {
13044   /* The high bound must be a power of two minus one.  */
13045   int log = exact_log2 (INTVAL (hi_bound) + 1);
13046   if (log == -1)
13047     return false;
13048
13049   /* The low bound is either zero (for usat) or one less than the
13050      negation of the high bound (for ssat).  */
13051   if (INTVAL (lo_bound) == 0)
13052     {
13053       if (mask)
13054         *mask = log;
13055       if (signed_sat)
13056         *signed_sat = false;
13057
13058       return true;
13059     }
13060
13061   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13062     {
13063       if (mask)
13064         *mask = log + 1;
13065       if (signed_sat)
13066         *signed_sat = true;
13067
13068       return true;
13069     }
13070
13071   return false;
13072 }
13073
13074 /* Return 1 if memory locations are adjacent.  */
13075 int
13076 adjacent_mem_locations (rtx a, rtx b)
13077 {
13078   /* We don't guarantee to preserve the order of these memory refs.  */
13079   if (volatile_refs_p (a) || volatile_refs_p (b))
13080     return 0;
13081
13082   if ((REG_P (XEXP (a, 0))
13083        || (GET_CODE (XEXP (a, 0)) == PLUS
13084            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13085       && (REG_P (XEXP (b, 0))
13086           || (GET_CODE (XEXP (b, 0)) == PLUS
13087               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13088     {
13089       HOST_WIDE_INT val0 = 0, val1 = 0;
13090       rtx reg0, reg1;
13091       int val_diff;
13092
13093       if (GET_CODE (XEXP (a, 0)) == PLUS)
13094         {
13095           reg0 = XEXP (XEXP (a, 0), 0);
13096           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13097         }
13098       else
13099         reg0 = XEXP (a, 0);
13100
13101       if (GET_CODE (XEXP (b, 0)) == PLUS)
13102         {
13103           reg1 = XEXP (XEXP (b, 0), 0);
13104           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13105         }
13106       else
13107         reg1 = XEXP (b, 0);
13108
13109       /* Don't accept any offset that will require multiple
13110          instructions to handle, since this would cause the
13111          arith_adjacentmem pattern to output an overlong sequence.  */
13112       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13113         return 0;
13114
13115       /* Don't allow an eliminable register: register elimination can make
13116          the offset too large.  */
13117       if (arm_eliminable_register (reg0))
13118         return 0;
13119
13120       val_diff = val1 - val0;
13121
13122       if (arm_ld_sched)
13123         {
13124           /* If the target has load delay slots, then there's no benefit
13125              to using an ldm instruction unless the offset is zero and
13126              we are optimizing for size.  */
13127           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13128                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13129                   && (val_diff == 4 || val_diff == -4));
13130         }
13131
13132       return ((REGNO (reg0) == REGNO (reg1))
13133               && (val_diff == 4 || val_diff == -4));
13134     }
13135
13136   return 0;
13137 }
13138
13139 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13140    for load operations, false for store operations.  CONSECUTIVE is true
13141    if the register numbers in the operation must be consecutive in the register
13142    bank. RETURN_PC is true if value is to be loaded in PC.
13143    The pattern we are trying to match for load is:
13144      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13145       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13146        :
13147        :
13148       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13149      ]
13150      where
13151      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13152      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13153      3.  If consecutive is TRUE, then for kth register being loaded,
13154          REGNO (R_dk) = REGNO (R_d0) + k.
13155    The pattern for store is similar.  */
13156 bool
13157 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13158                      bool consecutive, bool return_pc)
13159 {
13160   HOST_WIDE_INT count = XVECLEN (op, 0);
13161   rtx reg, mem, addr;
13162   unsigned regno;
13163   unsigned first_regno;
13164   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13165   rtx elt;
13166   bool addr_reg_in_reglist = false;
13167   bool update = false;
13168   int reg_increment;
13169   int offset_adj;
13170   int regs_per_val;
13171
13172   /* If not in SImode, then registers must be consecutive
13173      (e.g., VLDM instructions for DFmode).  */
13174   gcc_assert ((mode == SImode) || consecutive);
13175   /* Setting return_pc for stores is illegal.  */
13176   gcc_assert (!return_pc || load);
13177
13178   /* Set up the increments and the regs per val based on the mode.  */
13179   reg_increment = GET_MODE_SIZE (mode);
13180   regs_per_val = reg_increment / 4;
13181   offset_adj = return_pc ? 1 : 0;
13182
13183   if (count <= 1
13184       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13185       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13186     return false;
13187
13188   /* Check if this is a write-back.  */
13189   elt = XVECEXP (op, 0, offset_adj);
13190   if (GET_CODE (SET_SRC (elt)) == PLUS)
13191     {
13192       i++;
13193       base = 1;
13194       update = true;
13195
13196       /* The offset adjustment must be the number of registers being
13197          popped times the size of a single register.  */
13198       if (!REG_P (SET_DEST (elt))
13199           || !REG_P (XEXP (SET_SRC (elt), 0))
13200           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13201           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13202           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13203              ((count - 1 - offset_adj) * reg_increment))
13204         return false;
13205     }
13206
13207   i = i + offset_adj;
13208   base = base + offset_adj;
13209   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13210      success depends on the type: VLDM can do just one reg,
13211      LDM must do at least two.  */
13212   if ((count <= i) && (mode == SImode))
13213       return false;
13214
13215   elt = XVECEXP (op, 0, i - 1);
13216   if (GET_CODE (elt) != SET)
13217     return false;
13218
13219   if (load)
13220     {
13221       reg = SET_DEST (elt);
13222       mem = SET_SRC (elt);
13223     }
13224   else
13225     {
13226       reg = SET_SRC (elt);
13227       mem = SET_DEST (elt);
13228     }
13229
13230   if (!REG_P (reg) || !MEM_P (mem))
13231     return false;
13232
13233   regno = REGNO (reg);
13234   first_regno = regno;
13235   addr = XEXP (mem, 0);
13236   if (GET_CODE (addr) == PLUS)
13237     {
13238       if (!CONST_INT_P (XEXP (addr, 1)))
13239         return false;
13240
13241       offset = INTVAL (XEXP (addr, 1));
13242       addr = XEXP (addr, 0);
13243     }
13244
13245   if (!REG_P (addr))
13246     return false;
13247
13248   /* Don't allow SP to be loaded unless it is also the base register. It
13249      guarantees that SP is reset correctly when an LDM instruction
13250      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13251   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13252     return false;
13253
13254   for (; i < count; i++)
13255     {
13256       elt = XVECEXP (op, 0, i);
13257       if (GET_CODE (elt) != SET)
13258         return false;
13259
13260       if (load)
13261         {
13262           reg = SET_DEST (elt);
13263           mem = SET_SRC (elt);
13264         }
13265       else
13266         {
13267           reg = SET_SRC (elt);
13268           mem = SET_DEST (elt);
13269         }
13270
13271       if (!REG_P (reg)
13272           || GET_MODE (reg) != mode
13273           || REGNO (reg) <= regno
13274           || (consecutive
13275               && (REGNO (reg) !=
13276                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13277           /* Don't allow SP to be loaded unless it is also the base register. It
13278              guarantees that SP is reset correctly when an LDM instruction
13279              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13280           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13281           || !MEM_P (mem)
13282           || GET_MODE (mem) != mode
13283           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13284                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13285                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13286                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13287                    offset + (i - base) * reg_increment))
13288               && (!REG_P (XEXP (mem, 0))
13289                   || offset + (i - base) * reg_increment != 0)))
13290         return false;
13291
13292       regno = REGNO (reg);
13293       if (regno == REGNO (addr))
13294         addr_reg_in_reglist = true;
13295     }
13296
13297   if (load)
13298     {
13299       if (update && addr_reg_in_reglist)
13300         return false;
13301
13302       /* For Thumb-1, address register is always modified - either by write-back
13303          or by explicit load.  If the pattern does not describe an update,
13304          then the address register must be in the list of loaded registers.  */
13305       if (TARGET_THUMB1)
13306         return update || addr_reg_in_reglist;
13307     }
13308
13309   return true;
13310 }
13311
13312 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13313    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13314    instruction.  ADD_OFFSET is nonzero if the base address register needs
13315    to be modified with an add instruction before we can use it.  */
13316
13317 static bool
13318 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13319                                  int nops, HOST_WIDE_INT add_offset)
13320  {
13321   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13322      if the offset isn't small enough.  The reason 2 ldrs are faster
13323      is because these ARMs are able to do more than one cache access
13324      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13325      whilst the ARM8 has a double bandwidth cache.  This means that
13326      these cores can do both an instruction fetch and a data fetch in
13327      a single cycle, so the trick of calculating the address into a
13328      scratch register (one of the result regs) and then doing a load
13329      multiple actually becomes slower (and no smaller in code size).
13330      That is the transformation
13331
13332         ldr     rd1, [rbase + offset]
13333         ldr     rd2, [rbase + offset + 4]
13334
13335      to
13336
13337         add     rd1, rbase, offset
13338         ldmia   rd1, {rd1, rd2}
13339
13340      produces worse code -- '3 cycles + any stalls on rd2' instead of
13341      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13342      access per cycle, the first sequence could never complete in less
13343      than 6 cycles, whereas the ldm sequence would only take 5 and
13344      would make better use of sequential accesses if not hitting the
13345      cache.
13346
13347      We cheat here and test 'arm_ld_sched' which we currently know to
13348      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13349      changes, then the test below needs to be reworked.  */
13350   if (nops == 2 && arm_ld_sched && add_offset != 0)
13351     return false;
13352
13353   /* XScale has load-store double instructions, but they have stricter
13354      alignment requirements than load-store multiple, so we cannot
13355      use them.
13356
13357      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13358      the pipeline until completion.
13359
13360         NREGS           CYCLES
13361           1               3
13362           2               4
13363           3               5
13364           4               6
13365
13366      An ldr instruction takes 1-3 cycles, but does not block the
13367      pipeline.
13368
13369         NREGS           CYCLES
13370           1              1-3
13371           2              2-6
13372           3              3-9
13373           4              4-12
13374
13375      Best case ldr will always win.  However, the more ldr instructions
13376      we issue, the less likely we are to be able to schedule them well.
13377      Using ldr instructions also increases code size.
13378
13379      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13380      for counts of 3 or 4 regs.  */
13381   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13382     return false;
13383   return true;
13384 }
13385
13386 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13387    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13388    an array ORDER which describes the sequence to use when accessing the
13389    offsets that produces an ascending order.  In this sequence, each
13390    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13391    must have been filled in with the lowest offset by the caller.
13392    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13393    we use to verify that ORDER produces an ascending order of registers.
13394    Return true if it was possible to construct such an order, false if
13395    not.  */
13396
13397 static bool
13398 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13399                       int *unsorted_regs)
13400 {
13401   int i;
13402   for (i = 1; i < nops; i++)
13403     {
13404       int j;
13405
13406       order[i] = order[i - 1];
13407       for (j = 0; j < nops; j++)
13408         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13409           {
13410             /* We must find exactly one offset that is higher than the
13411                previous one by 4.  */
13412             if (order[i] != order[i - 1])
13413               return false;
13414             order[i] = j;
13415           }
13416       if (order[i] == order[i - 1])
13417         return false;
13418       /* The register numbers must be ascending.  */
13419       if (unsorted_regs != NULL
13420           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13421         return false;
13422     }
13423   return true;
13424 }
13425
13426 /* Used to determine in a peephole whether a sequence of load
13427    instructions can be changed into a load-multiple instruction.
13428    NOPS is the number of separate load instructions we are examining.  The
13429    first NOPS entries in OPERANDS are the destination registers, the
13430    next NOPS entries are memory operands.  If this function is
13431    successful, *BASE is set to the common base register of the memory
13432    accesses; *LOAD_OFFSET is set to the first memory location's offset
13433    from that base register.
13434    REGS is an array filled in with the destination register numbers.
13435    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13436    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13437    the sequence of registers in REGS matches the loads from ascending memory
13438    locations, and the function verifies that the register numbers are
13439    themselves ascending.  If CHECK_REGS is false, the register numbers
13440    are stored in the order they are found in the operands.  */
13441 static int
13442 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13443                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13444 {
13445   int unsorted_regs[MAX_LDM_STM_OPS];
13446   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13447   int order[MAX_LDM_STM_OPS];
13448   rtx base_reg_rtx = NULL;
13449   int base_reg = -1;
13450   int i, ldm_case;
13451
13452   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13453      easily extended if required.  */
13454   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13455
13456   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13457
13458   /* Loop over the operands and check that the memory references are
13459      suitable (i.e. immediate offsets from the same base register).  At
13460      the same time, extract the target register, and the memory
13461      offsets.  */
13462   for (i = 0; i < nops; i++)
13463     {
13464       rtx reg;
13465       rtx offset;
13466
13467       /* Convert a subreg of a mem into the mem itself.  */
13468       if (GET_CODE (operands[nops + i]) == SUBREG)
13469         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13470
13471       gcc_assert (MEM_P (operands[nops + i]));
13472
13473       /* Don't reorder volatile memory references; it doesn't seem worth
13474          looking for the case where the order is ok anyway.  */
13475       if (MEM_VOLATILE_P (operands[nops + i]))
13476         return 0;
13477
13478       offset = const0_rtx;
13479
13480       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13481            || (GET_CODE (reg) == SUBREG
13482                && REG_P (reg = SUBREG_REG (reg))))
13483           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13484               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13485                   || (GET_CODE (reg) == SUBREG
13486                       && REG_P (reg = SUBREG_REG (reg))))
13487               && (CONST_INT_P (offset
13488                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13489         {
13490           if (i == 0)
13491             {
13492               base_reg = REGNO (reg);
13493               base_reg_rtx = reg;
13494               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13495                 return 0;
13496             }
13497           else if (base_reg != (int) REGNO (reg))
13498             /* Not addressed from the same base register.  */
13499             return 0;
13500
13501           unsorted_regs[i] = (REG_P (operands[i])
13502                               ? REGNO (operands[i])
13503                               : REGNO (SUBREG_REG (operands[i])));
13504
13505           /* If it isn't an integer register, or if it overwrites the
13506              base register but isn't the last insn in the list, then
13507              we can't do this.  */
13508           if (unsorted_regs[i] < 0
13509               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13510               || unsorted_regs[i] > 14
13511               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13512             return 0;
13513
13514           /* Don't allow SP to be loaded unless it is also the base
13515              register.  It guarantees that SP is reset correctly when
13516              an LDM instruction is interrupted.  Otherwise, we might
13517              end up with a corrupt stack.  */
13518           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13519             return 0;
13520
13521           unsorted_offsets[i] = INTVAL (offset);
13522           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13523             order[0] = i;
13524         }
13525       else
13526         /* Not a suitable memory address.  */
13527         return 0;
13528     }
13529
13530   /* All the useful information has now been extracted from the
13531      operands into unsorted_regs and unsorted_offsets; additionally,
13532      order[0] has been set to the lowest offset in the list.  Sort
13533      the offsets into order, verifying that they are adjacent, and
13534      check that the register numbers are ascending.  */
13535   if (!compute_offset_order (nops, unsorted_offsets, order,
13536                              check_regs ? unsorted_regs : NULL))
13537     return 0;
13538
13539   if (saved_order)
13540     memcpy (saved_order, order, sizeof order);
13541
13542   if (base)
13543     {
13544       *base = base_reg;
13545
13546       for (i = 0; i < nops; i++)
13547         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13548
13549       *load_offset = unsorted_offsets[order[0]];
13550     }
13551
13552   if (TARGET_THUMB1
13553       && !peep2_reg_dead_p (nops, base_reg_rtx))
13554     return 0;
13555
13556   if (unsorted_offsets[order[0]] == 0)
13557     ldm_case = 1; /* ldmia */
13558   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13559     ldm_case = 2; /* ldmib */
13560   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13561     ldm_case = 3; /* ldmda */
13562   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13563     ldm_case = 4; /* ldmdb */
13564   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13565            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13566     ldm_case = 5;
13567   else
13568     return 0;
13569
13570   if (!multiple_operation_profitable_p (false, nops,
13571                                         ldm_case == 5
13572                                         ? unsorted_offsets[order[0]] : 0))
13573     return 0;
13574
13575   return ldm_case;
13576 }
13577
13578 /* Used to determine in a peephole whether a sequence of store instructions can
13579    be changed into a store-multiple instruction.
13580    NOPS is the number of separate store instructions we are examining.
13581    NOPS_TOTAL is the total number of instructions recognized by the peephole
13582    pattern.
13583    The first NOPS entries in OPERANDS are the source registers, the next
13584    NOPS entries are memory operands.  If this function is successful, *BASE is
13585    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13586    to the first memory location's offset from that base register.  REGS is an
13587    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13588    likewise filled with the corresponding rtx's.
13589    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13590    numbers to an ascending order of stores.
13591    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13592    from ascending memory locations, and the function verifies that the register
13593    numbers are themselves ascending.  If CHECK_REGS is false, the register
13594    numbers are stored in the order they are found in the operands.  */
13595 static int
13596 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13597                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13598                          HOST_WIDE_INT *load_offset, bool check_regs)
13599 {
13600   int unsorted_regs[MAX_LDM_STM_OPS];
13601   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13602   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13603   int order[MAX_LDM_STM_OPS];
13604   int base_reg = -1;
13605   rtx base_reg_rtx = NULL;
13606   int i, stm_case;
13607
13608   /* Write back of base register is currently only supported for Thumb 1.  */
13609   int base_writeback = TARGET_THUMB1;
13610
13611   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13612      easily extended if required.  */
13613   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13614
13615   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13616
13617   /* Loop over the operands and check that the memory references are
13618      suitable (i.e. immediate offsets from the same base register).  At
13619      the same time, extract the target register, and the memory
13620      offsets.  */
13621   for (i = 0; i < nops; i++)
13622     {
13623       rtx reg;
13624       rtx offset;
13625
13626       /* Convert a subreg of a mem into the mem itself.  */
13627       if (GET_CODE (operands[nops + i]) == SUBREG)
13628         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13629
13630       gcc_assert (MEM_P (operands[nops + i]));
13631
13632       /* Don't reorder volatile memory references; it doesn't seem worth
13633          looking for the case where the order is ok anyway.  */
13634       if (MEM_VOLATILE_P (operands[nops + i]))
13635         return 0;
13636
13637       offset = const0_rtx;
13638
13639       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13640            || (GET_CODE (reg) == SUBREG
13641                && REG_P (reg = SUBREG_REG (reg))))
13642           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13643               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13644                   || (GET_CODE (reg) == SUBREG
13645                       && REG_P (reg = SUBREG_REG (reg))))
13646               && (CONST_INT_P (offset
13647                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13648         {
13649           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13650                                   ? operands[i] : SUBREG_REG (operands[i]));
13651           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13652
13653           if (i == 0)
13654             {
13655               base_reg = REGNO (reg);
13656               base_reg_rtx = reg;
13657               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13658                 return 0;
13659             }
13660           else if (base_reg != (int) REGNO (reg))
13661             /* Not addressed from the same base register.  */
13662             return 0;
13663
13664           /* If it isn't an integer register, then we can't do this.  */
13665           if (unsorted_regs[i] < 0
13666               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13667               /* The effects are unpredictable if the base register is
13668                  both updated and stored.  */
13669               || (base_writeback && unsorted_regs[i] == base_reg)
13670               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13671               || unsorted_regs[i] > 14)
13672             return 0;
13673
13674           unsorted_offsets[i] = INTVAL (offset);
13675           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13676             order[0] = i;
13677         }
13678       else
13679         /* Not a suitable memory address.  */
13680         return 0;
13681     }
13682
13683   /* All the useful information has now been extracted from the
13684      operands into unsorted_regs and unsorted_offsets; additionally,
13685      order[0] has been set to the lowest offset in the list.  Sort
13686      the offsets into order, verifying that they are adjacent, and
13687      check that the register numbers are ascending.  */
13688   if (!compute_offset_order (nops, unsorted_offsets, order,
13689                              check_regs ? unsorted_regs : NULL))
13690     return 0;
13691
13692   if (saved_order)
13693     memcpy (saved_order, order, sizeof order);
13694
13695   if (base)
13696     {
13697       *base = base_reg;
13698
13699       for (i = 0; i < nops; i++)
13700         {
13701           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13702           if (reg_rtxs)
13703             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13704         }
13705
13706       *load_offset = unsorted_offsets[order[0]];
13707     }
13708
13709   if (TARGET_THUMB1
13710       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13711     return 0;
13712
13713   if (unsorted_offsets[order[0]] == 0)
13714     stm_case = 1; /* stmia */
13715   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13716     stm_case = 2; /* stmib */
13717   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13718     stm_case = 3; /* stmda */
13719   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13720     stm_case = 4; /* stmdb */
13721   else
13722     return 0;
13723
13724   if (!multiple_operation_profitable_p (false, nops, 0))
13725     return 0;
13726
13727   return stm_case;
13728 }
13729 \f
13730 /* Routines for use in generating RTL.  */
13731
13732 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13733    the instruction; REGS and MEMS are arrays containing the operands.
13734    BASEREG is the base register to be used in addressing the memory operands.
13735    WBACK_OFFSET is nonzero if the instruction should update the base
13736    register.  */
13737
13738 static rtx
13739 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13740                          HOST_WIDE_INT wback_offset)
13741 {
13742   int i = 0, j;
13743   rtx result;
13744
13745   if (!multiple_operation_profitable_p (false, count, 0))
13746     {
13747       rtx seq;
13748
13749       start_sequence ();
13750
13751       for (i = 0; i < count; i++)
13752         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13753
13754       if (wback_offset != 0)
13755         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13756
13757       seq = get_insns ();
13758       end_sequence ();
13759
13760       return seq;
13761     }
13762
13763   result = gen_rtx_PARALLEL (VOIDmode,
13764                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13765   if (wback_offset != 0)
13766     {
13767       XVECEXP (result, 0, 0)
13768         = gen_rtx_SET (VOIDmode, basereg,
13769                        plus_constant (Pmode, basereg, wback_offset));
13770       i = 1;
13771       count++;
13772     }
13773
13774   for (j = 0; i < count; i++, j++)
13775     XVECEXP (result, 0, i)
13776       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13777
13778   return result;
13779 }
13780
13781 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13782    the instruction; REGS and MEMS are arrays containing the operands.
13783    BASEREG is the base register to be used in addressing the memory operands.
13784    WBACK_OFFSET is nonzero if the instruction should update the base
13785    register.  */
13786
13787 static rtx
13788 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13789                           HOST_WIDE_INT wback_offset)
13790 {
13791   int i = 0, j;
13792   rtx result;
13793
13794   if (GET_CODE (basereg) == PLUS)
13795     basereg = XEXP (basereg, 0);
13796
13797   if (!multiple_operation_profitable_p (false, count, 0))
13798     {
13799       rtx seq;
13800
13801       start_sequence ();
13802
13803       for (i = 0; i < count; i++)
13804         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13805
13806       if (wback_offset != 0)
13807         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13808
13809       seq = get_insns ();
13810       end_sequence ();
13811
13812       return seq;
13813     }
13814
13815   result = gen_rtx_PARALLEL (VOIDmode,
13816                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13817   if (wback_offset != 0)
13818     {
13819       XVECEXP (result, 0, 0)
13820         = gen_rtx_SET (VOIDmode, basereg,
13821                        plus_constant (Pmode, basereg, wback_offset));
13822       i = 1;
13823       count++;
13824     }
13825
13826   for (j = 0; i < count; i++, j++)
13827     XVECEXP (result, 0, i)
13828       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13829
13830   return result;
13831 }
13832
13833 /* Generate either a load-multiple or a store-multiple instruction.  This
13834    function can be used in situations where we can start with a single MEM
13835    rtx and adjust its address upwards.
13836    COUNT is the number of operations in the instruction, not counting a
13837    possible update of the base register.  REGS is an array containing the
13838    register operands.
13839    BASEREG is the base register to be used in addressing the memory operands,
13840    which are constructed from BASEMEM.
13841    WRITE_BACK specifies whether the generated instruction should include an
13842    update of the base register.
13843    OFFSETP is used to pass an offset to and from this function; this offset
13844    is not used when constructing the address (instead BASEMEM should have an
13845    appropriate offset in its address), it is used only for setting
13846    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13847
13848 static rtx
13849 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13850                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13851 {
13852   rtx mems[MAX_LDM_STM_OPS];
13853   HOST_WIDE_INT offset = *offsetp;
13854   int i;
13855
13856   gcc_assert (count <= MAX_LDM_STM_OPS);
13857
13858   if (GET_CODE (basereg) == PLUS)
13859     basereg = XEXP (basereg, 0);
13860
13861   for (i = 0; i < count; i++)
13862     {
13863       rtx addr = plus_constant (Pmode, basereg, i * 4);
13864       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13865       offset += 4;
13866     }
13867
13868   if (write_back)
13869     *offsetp = offset;
13870
13871   if (is_load)
13872     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13873                                     write_back ? 4 * count : 0);
13874   else
13875     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13876                                      write_back ? 4 * count : 0);
13877 }
13878
13879 rtx
13880 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13881                        rtx basemem, HOST_WIDE_INT *offsetp)
13882 {
13883   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13884                               offsetp);
13885 }
13886
13887 rtx
13888 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13889                         rtx basemem, HOST_WIDE_INT *offsetp)
13890 {
13891   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13892                               offsetp);
13893 }
13894
13895 /* Called from a peephole2 expander to turn a sequence of loads into an
13896    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13897    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13898    is true if we can reorder the registers because they are used commutatively
13899    subsequently.
13900    Returns true iff we could generate a new instruction.  */
13901
13902 bool
13903 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13904 {
13905   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13906   rtx mems[MAX_LDM_STM_OPS];
13907   int i, j, base_reg;
13908   rtx base_reg_rtx;
13909   HOST_WIDE_INT offset;
13910   int write_back = FALSE;
13911   int ldm_case;
13912   rtx addr;
13913
13914   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13915                                      &base_reg, &offset, !sort_regs);
13916
13917   if (ldm_case == 0)
13918     return false;
13919
13920   if (sort_regs)
13921     for (i = 0; i < nops - 1; i++)
13922       for (j = i + 1; j < nops; j++)
13923         if (regs[i] > regs[j])
13924           {
13925             int t = regs[i];
13926             regs[i] = regs[j];
13927             regs[j] = t;
13928           }
13929   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13930
13931   if (TARGET_THUMB1)
13932     {
13933       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13934       gcc_assert (ldm_case == 1 || ldm_case == 5);
13935       write_back = TRUE;
13936     }
13937
13938   if (ldm_case == 5)
13939     {
13940       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13941       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13942       offset = 0;
13943       if (!TARGET_THUMB1)
13944         {
13945           base_reg = regs[0];
13946           base_reg_rtx = newbase;
13947         }
13948     }
13949
13950   for (i = 0; i < nops; i++)
13951     {
13952       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13953       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13954                                               SImode, addr, 0);
13955     }
13956   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13957                                       write_back ? offset + i * 4 : 0));
13958   return true;
13959 }
13960
13961 /* Called from a peephole2 expander to turn a sequence of stores into an
13962    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13963    NOPS indicates how many separate stores we are trying to combine.
13964    Returns true iff we could generate a new instruction.  */
13965
13966 bool
13967 gen_stm_seq (rtx *operands, int nops)
13968 {
13969   int i;
13970   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13971   rtx mems[MAX_LDM_STM_OPS];
13972   int base_reg;
13973   rtx base_reg_rtx;
13974   HOST_WIDE_INT offset;
13975   int write_back = FALSE;
13976   int stm_case;
13977   rtx addr;
13978   bool base_reg_dies;
13979
13980   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13981                                       mem_order, &base_reg, &offset, true);
13982
13983   if (stm_case == 0)
13984     return false;
13985
13986   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13987
13988   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13989   if (TARGET_THUMB1)
13990     {
13991       gcc_assert (base_reg_dies);
13992       write_back = TRUE;
13993     }
13994
13995   if (stm_case == 5)
13996     {
13997       gcc_assert (base_reg_dies);
13998       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13999       offset = 0;
14000     }
14001
14002   addr = plus_constant (Pmode, base_reg_rtx, offset);
14003
14004   for (i = 0; i < nops; i++)
14005     {
14006       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14007       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14008                                               SImode, addr, 0);
14009     }
14010   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14011                                        write_back ? offset + i * 4 : 0));
14012   return true;
14013 }
14014
14015 /* Called from a peephole2 expander to turn a sequence of stores that are
14016    preceded by constant loads into an STM instruction.  OPERANDS are the
14017    operands found by the peephole matcher; NOPS indicates how many
14018    separate stores we are trying to combine; there are 2 * NOPS
14019    instructions in the peephole.
14020    Returns true iff we could generate a new instruction.  */
14021
14022 bool
14023 gen_const_stm_seq (rtx *operands, int nops)
14024 {
14025   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14026   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14027   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14028   rtx mems[MAX_LDM_STM_OPS];
14029   int base_reg;
14030   rtx base_reg_rtx;
14031   HOST_WIDE_INT offset;
14032   int write_back = FALSE;
14033   int stm_case;
14034   rtx addr;
14035   bool base_reg_dies;
14036   int i, j;
14037   HARD_REG_SET allocated;
14038
14039   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14040                                       mem_order, &base_reg, &offset, false);
14041
14042   if (stm_case == 0)
14043     return false;
14044
14045   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14046
14047   /* If the same register is used more than once, try to find a free
14048      register.  */
14049   CLEAR_HARD_REG_SET (allocated);
14050   for (i = 0; i < nops; i++)
14051     {
14052       for (j = i + 1; j < nops; j++)
14053         if (regs[i] == regs[j])
14054           {
14055             rtx t = peep2_find_free_register (0, nops * 2,
14056                                               TARGET_THUMB1 ? "l" : "r",
14057                                               SImode, &allocated);
14058             if (t == NULL_RTX)
14059               return false;
14060             reg_rtxs[i] = t;
14061             regs[i] = REGNO (t);
14062           }
14063     }
14064
14065   /* Compute an ordering that maps the register numbers to an ascending
14066      sequence.  */
14067   reg_order[0] = 0;
14068   for (i = 0; i < nops; i++)
14069     if (regs[i] < regs[reg_order[0]])
14070       reg_order[0] = i;
14071
14072   for (i = 1; i < nops; i++)
14073     {
14074       int this_order = reg_order[i - 1];
14075       for (j = 0; j < nops; j++)
14076         if (regs[j] > regs[reg_order[i - 1]]
14077             && (this_order == reg_order[i - 1]
14078                 || regs[j] < regs[this_order]))
14079           this_order = j;
14080       reg_order[i] = this_order;
14081     }
14082
14083   /* Ensure that registers that must be live after the instruction end
14084      up with the correct value.  */
14085   for (i = 0; i < nops; i++)
14086     {
14087       int this_order = reg_order[i];
14088       if ((this_order != mem_order[i]
14089            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14090           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14091         return false;
14092     }
14093
14094   /* Load the constants.  */
14095   for (i = 0; i < nops; i++)
14096     {
14097       rtx op = operands[2 * nops + mem_order[i]];
14098       sorted_regs[i] = regs[reg_order[i]];
14099       emit_move_insn (reg_rtxs[reg_order[i]], op);
14100     }
14101
14102   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14103
14104   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14105   if (TARGET_THUMB1)
14106     {
14107       gcc_assert (base_reg_dies);
14108       write_back = TRUE;
14109     }
14110
14111   if (stm_case == 5)
14112     {
14113       gcc_assert (base_reg_dies);
14114       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14115       offset = 0;
14116     }
14117
14118   addr = plus_constant (Pmode, base_reg_rtx, offset);
14119
14120   for (i = 0; i < nops; i++)
14121     {
14122       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14123       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14124                                               SImode, addr, 0);
14125     }
14126   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14127                                        write_back ? offset + i * 4 : 0));
14128   return true;
14129 }
14130
14131 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14132    unaligned copies on processors which support unaligned semantics for those
14133    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14134    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14135    An interleave factor of 1 (the minimum) will perform no interleaving.
14136    Load/store multiple are used for aligned addresses where possible.  */
14137
14138 static void
14139 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14140                                    HOST_WIDE_INT length,
14141                                    unsigned int interleave_factor)
14142 {
14143   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14144   int *regnos = XALLOCAVEC (int, interleave_factor);
14145   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14146   HOST_WIDE_INT i, j;
14147   HOST_WIDE_INT remaining = length, words;
14148   rtx halfword_tmp = NULL, byte_tmp = NULL;
14149   rtx dst, src;
14150   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14151   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14152   HOST_WIDE_INT srcoffset, dstoffset;
14153   HOST_WIDE_INT src_autoinc, dst_autoinc;
14154   rtx mem, addr;
14155
14156   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14157
14158   /* Use hard registers if we have aligned source or destination so we can use
14159      load/store multiple with contiguous registers.  */
14160   if (dst_aligned || src_aligned)
14161     for (i = 0; i < interleave_factor; i++)
14162       regs[i] = gen_rtx_REG (SImode, i);
14163   else
14164     for (i = 0; i < interleave_factor; i++)
14165       regs[i] = gen_reg_rtx (SImode);
14166
14167   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14168   src = copy_addr_to_reg (XEXP (srcbase, 0));
14169
14170   srcoffset = dstoffset = 0;
14171
14172   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14173      For copying the last bytes we want to subtract this offset again.  */
14174   src_autoinc = dst_autoinc = 0;
14175
14176   for (i = 0; i < interleave_factor; i++)
14177     regnos[i] = i;
14178
14179   /* Copy BLOCK_SIZE_BYTES chunks.  */
14180
14181   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14182     {
14183       /* Load words.  */
14184       if (src_aligned && interleave_factor > 1)
14185         {
14186           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14187                                             TRUE, srcbase, &srcoffset));
14188           src_autoinc += UNITS_PER_WORD * interleave_factor;
14189         }
14190       else
14191         {
14192           for (j = 0; j < interleave_factor; j++)
14193             {
14194               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14195                                                  - src_autoinc));
14196               mem = adjust_automodify_address (srcbase, SImode, addr,
14197                                                srcoffset + j * UNITS_PER_WORD);
14198               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14199             }
14200           srcoffset += block_size_bytes;
14201         }
14202
14203       /* Store words.  */
14204       if (dst_aligned && interleave_factor > 1)
14205         {
14206           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14207                                              TRUE, dstbase, &dstoffset));
14208           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14209         }
14210       else
14211         {
14212           for (j = 0; j < interleave_factor; j++)
14213             {
14214               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14215                                                  - dst_autoinc));
14216               mem = adjust_automodify_address (dstbase, SImode, addr,
14217                                                dstoffset + j * UNITS_PER_WORD);
14218               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14219             }
14220           dstoffset += block_size_bytes;
14221         }
14222
14223       remaining -= block_size_bytes;
14224     }
14225
14226   /* Copy any whole words left (note these aren't interleaved with any
14227      subsequent halfword/byte load/stores in the interests of simplicity).  */
14228
14229   words = remaining / UNITS_PER_WORD;
14230
14231   gcc_assert (words < interleave_factor);
14232
14233   if (src_aligned && words > 1)
14234     {
14235       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14236                                         &srcoffset));
14237       src_autoinc += UNITS_PER_WORD * words;
14238     }
14239   else
14240     {
14241       for (j = 0; j < words; j++)
14242         {
14243           addr = plus_constant (Pmode, src,
14244                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14245           mem = adjust_automodify_address (srcbase, SImode, addr,
14246                                            srcoffset + j * UNITS_PER_WORD);
14247           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14248         }
14249       srcoffset += words * UNITS_PER_WORD;
14250     }
14251
14252   if (dst_aligned && words > 1)
14253     {
14254       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14255                                          &dstoffset));
14256       dst_autoinc += words * UNITS_PER_WORD;
14257     }
14258   else
14259     {
14260       for (j = 0; j < words; j++)
14261         {
14262           addr = plus_constant (Pmode, dst,
14263                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14264           mem = adjust_automodify_address (dstbase, SImode, addr,
14265                                            dstoffset + j * UNITS_PER_WORD);
14266           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14267         }
14268       dstoffset += words * UNITS_PER_WORD;
14269     }
14270
14271   remaining -= words * UNITS_PER_WORD;
14272
14273   gcc_assert (remaining < 4);
14274
14275   /* Copy a halfword if necessary.  */
14276
14277   if (remaining >= 2)
14278     {
14279       halfword_tmp = gen_reg_rtx (SImode);
14280
14281       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14282       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14283       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14284
14285       /* Either write out immediately, or delay until we've loaded the last
14286          byte, depending on interleave factor.  */
14287       if (interleave_factor == 1)
14288         {
14289           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14290           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14291           emit_insn (gen_unaligned_storehi (mem,
14292                        gen_lowpart (HImode, halfword_tmp)));
14293           halfword_tmp = NULL;
14294           dstoffset += 2;
14295         }
14296
14297       remaining -= 2;
14298       srcoffset += 2;
14299     }
14300
14301   gcc_assert (remaining < 2);
14302
14303   /* Copy last byte.  */
14304
14305   if ((remaining & 1) != 0)
14306     {
14307       byte_tmp = gen_reg_rtx (SImode);
14308
14309       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14310       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14311       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14312
14313       if (interleave_factor == 1)
14314         {
14315           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14316           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14317           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14318           byte_tmp = NULL;
14319           dstoffset++;
14320         }
14321
14322       remaining--;
14323       srcoffset++;
14324     }
14325
14326   /* Store last halfword if we haven't done so already.  */
14327
14328   if (halfword_tmp)
14329     {
14330       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14331       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14332       emit_insn (gen_unaligned_storehi (mem,
14333                    gen_lowpart (HImode, halfword_tmp)));
14334       dstoffset += 2;
14335     }
14336
14337   /* Likewise for last byte.  */
14338
14339   if (byte_tmp)
14340     {
14341       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14342       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14343       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14344       dstoffset++;
14345     }
14346
14347   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14348 }
14349
14350 /* From mips_adjust_block_mem:
14351
14352    Helper function for doing a loop-based block operation on memory
14353    reference MEM.  Each iteration of the loop will operate on LENGTH
14354    bytes of MEM.
14355
14356    Create a new base register for use within the loop and point it to
14357    the start of MEM.  Create a new memory reference that uses this
14358    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14359
14360 static void
14361 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14362                       rtx *loop_mem)
14363 {
14364   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14365
14366   /* Although the new mem does not refer to a known location,
14367      it does keep up to LENGTH bytes of alignment.  */
14368   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14369   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14370 }
14371
14372 /* From mips_block_move_loop:
14373
14374    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14375    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14376    the memory regions do not overlap.  */
14377
14378 static void
14379 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14380                                unsigned int interleave_factor,
14381                                HOST_WIDE_INT bytes_per_iter)
14382 {
14383   rtx label, src_reg, dest_reg, final_src, test;
14384   HOST_WIDE_INT leftover;
14385
14386   leftover = length % bytes_per_iter;
14387   length -= leftover;
14388
14389   /* Create registers and memory references for use within the loop.  */
14390   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14391   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14392
14393   /* Calculate the value that SRC_REG should have after the last iteration of
14394      the loop.  */
14395   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14396                                    0, 0, OPTAB_WIDEN);
14397
14398   /* Emit the start of the loop.  */
14399   label = gen_label_rtx ();
14400   emit_label (label);
14401
14402   /* Emit the loop body.  */
14403   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14404                                      interleave_factor);
14405
14406   /* Move on to the next block.  */
14407   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14408   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14409
14410   /* Emit the loop condition.  */
14411   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14412   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14413
14414   /* Mop up any left-over bytes.  */
14415   if (leftover)
14416     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14417 }
14418
14419 /* Emit a block move when either the source or destination is unaligned (not
14420    aligned to a four-byte boundary).  This may need further tuning depending on
14421    core type, optimize_size setting, etc.  */
14422
14423 static int
14424 arm_movmemqi_unaligned (rtx *operands)
14425 {
14426   HOST_WIDE_INT length = INTVAL (operands[2]);
14427
14428   if (optimize_size)
14429     {
14430       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14431       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14432       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14433          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14434          or dst_aligned though: allow more interleaving in those cases since the
14435          resulting code can be smaller.  */
14436       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14437       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14438
14439       if (length > 12)
14440         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14441                                        interleave_factor, bytes_per_iter);
14442       else
14443         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14444                                            interleave_factor);
14445     }
14446   else
14447     {
14448       /* Note that the loop created by arm_block_move_unaligned_loop may be
14449          subject to loop unrolling, which makes tuning this condition a little
14450          redundant.  */
14451       if (length > 32)
14452         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14453       else
14454         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14455     }
14456
14457   return 1;
14458 }
14459
14460 int
14461 arm_gen_movmemqi (rtx *operands)
14462 {
14463   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14464   HOST_WIDE_INT srcoffset, dstoffset;
14465   int i;
14466   rtx src, dst, srcbase, dstbase;
14467   rtx part_bytes_reg = NULL;
14468   rtx mem;
14469
14470   if (!CONST_INT_P (operands[2])
14471       || !CONST_INT_P (operands[3])
14472       || INTVAL (operands[2]) > 64)
14473     return 0;
14474
14475   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14476     return arm_movmemqi_unaligned (operands);
14477
14478   if (INTVAL (operands[3]) & 3)
14479     return 0;
14480
14481   dstbase = operands[0];
14482   srcbase = operands[1];
14483
14484   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14485   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14486
14487   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14488   out_words_to_go = INTVAL (operands[2]) / 4;
14489   last_bytes = INTVAL (operands[2]) & 3;
14490   dstoffset = srcoffset = 0;
14491
14492   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14493     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14494
14495   for (i = 0; in_words_to_go >= 2; i+=4)
14496     {
14497       if (in_words_to_go > 4)
14498         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14499                                           TRUE, srcbase, &srcoffset));
14500       else
14501         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14502                                           src, FALSE, srcbase,
14503                                           &srcoffset));
14504
14505       if (out_words_to_go)
14506         {
14507           if (out_words_to_go > 4)
14508             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14509                                                TRUE, dstbase, &dstoffset));
14510           else if (out_words_to_go != 1)
14511             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14512                                                out_words_to_go, dst,
14513                                                (last_bytes == 0
14514                                                 ? FALSE : TRUE),
14515                                                dstbase, &dstoffset));
14516           else
14517             {
14518               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14519               emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14520               if (last_bytes != 0)
14521                 {
14522                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14523                   dstoffset += 4;
14524                 }
14525             }
14526         }
14527
14528       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14529       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14530     }
14531
14532   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14533   if (out_words_to_go)
14534     {
14535       rtx sreg;
14536
14537       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14538       sreg = copy_to_reg (mem);
14539
14540       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14541       emit_move_insn (mem, sreg);
14542       in_words_to_go--;
14543
14544       gcc_assert (!in_words_to_go);     /* Sanity check */
14545     }
14546
14547   if (in_words_to_go)
14548     {
14549       gcc_assert (in_words_to_go > 0);
14550
14551       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14552       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14553     }
14554
14555   gcc_assert (!last_bytes || part_bytes_reg);
14556
14557   if (BYTES_BIG_ENDIAN && last_bytes)
14558     {
14559       rtx tmp = gen_reg_rtx (SImode);
14560
14561       /* The bytes we want are in the top end of the word.  */
14562       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14563                               GEN_INT (8 * (4 - last_bytes))));
14564       part_bytes_reg = tmp;
14565
14566       while (last_bytes)
14567         {
14568           mem = adjust_automodify_address (dstbase, QImode,
14569                                            plus_constant (Pmode, dst,
14570                                                           last_bytes - 1),
14571                                            dstoffset + last_bytes - 1);
14572           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14573
14574           if (--last_bytes)
14575             {
14576               tmp = gen_reg_rtx (SImode);
14577               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14578               part_bytes_reg = tmp;
14579             }
14580         }
14581
14582     }
14583   else
14584     {
14585       if (last_bytes > 1)
14586         {
14587           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14588           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14589           last_bytes -= 2;
14590           if (last_bytes)
14591             {
14592               rtx tmp = gen_reg_rtx (SImode);
14593               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14594               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14595               part_bytes_reg = tmp;
14596               dstoffset += 2;
14597             }
14598         }
14599
14600       if (last_bytes)
14601         {
14602           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14603           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14604         }
14605     }
14606
14607   return 1;
14608 }
14609
14610 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14611 by mode size.  */
14612 inline static rtx
14613 next_consecutive_mem (rtx mem)
14614 {
14615   enum machine_mode mode = GET_MODE (mem);
14616   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14617   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14618
14619   return adjust_automodify_address (mem, mode, addr, offset);
14620 }
14621
14622 /* Copy using LDRD/STRD instructions whenever possible.
14623    Returns true upon success. */
14624 bool
14625 gen_movmem_ldrd_strd (rtx *operands)
14626 {
14627   unsigned HOST_WIDE_INT len;
14628   HOST_WIDE_INT align;
14629   rtx src, dst, base;
14630   rtx reg0;
14631   bool src_aligned, dst_aligned;
14632   bool src_volatile, dst_volatile;
14633
14634   gcc_assert (CONST_INT_P (operands[2]));
14635   gcc_assert (CONST_INT_P (operands[3]));
14636
14637   len = UINTVAL (operands[2]);
14638   if (len > 64)
14639     return false;
14640
14641   /* Maximum alignment we can assume for both src and dst buffers.  */
14642   align = INTVAL (operands[3]);
14643
14644   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14645     return false;
14646
14647   /* Place src and dst addresses in registers
14648      and update the corresponding mem rtx.  */
14649   dst = operands[0];
14650   dst_volatile = MEM_VOLATILE_P (dst);
14651   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14652   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14653   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14654
14655   src = operands[1];
14656   src_volatile = MEM_VOLATILE_P (src);
14657   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14658   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14659   src = adjust_automodify_address (src, VOIDmode, base, 0);
14660
14661   if (!unaligned_access && !(src_aligned && dst_aligned))
14662     return false;
14663
14664   if (src_volatile || dst_volatile)
14665     return false;
14666
14667   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14668   if (!(dst_aligned || src_aligned))
14669     return arm_gen_movmemqi (operands);
14670
14671   src = adjust_address (src, DImode, 0);
14672   dst = adjust_address (dst, DImode, 0);
14673   while (len >= 8)
14674     {
14675       len -= 8;
14676       reg0 = gen_reg_rtx (DImode);
14677       if (src_aligned)
14678         emit_move_insn (reg0, src);
14679       else
14680         emit_insn (gen_unaligned_loaddi (reg0, src));
14681
14682       if (dst_aligned)
14683         emit_move_insn (dst, reg0);
14684       else
14685         emit_insn (gen_unaligned_storedi (dst, reg0));
14686
14687       src = next_consecutive_mem (src);
14688       dst = next_consecutive_mem (dst);
14689     }
14690
14691   gcc_assert (len < 8);
14692   if (len >= 4)
14693     {
14694       /* More than a word but less than a double-word to copy.  Copy a word.  */
14695       reg0 = gen_reg_rtx (SImode);
14696       src = adjust_address (src, SImode, 0);
14697       dst = adjust_address (dst, SImode, 0);
14698       if (src_aligned)
14699         emit_move_insn (reg0, src);
14700       else
14701         emit_insn (gen_unaligned_loadsi (reg0, src));
14702
14703       if (dst_aligned)
14704         emit_move_insn (dst, reg0);
14705       else
14706         emit_insn (gen_unaligned_storesi (dst, reg0));
14707
14708       src = next_consecutive_mem (src);
14709       dst = next_consecutive_mem (dst);
14710       len -= 4;
14711     }
14712
14713   if (len == 0)
14714     return true;
14715
14716   /* Copy the remaining bytes.  */
14717   if (len >= 2)
14718     {
14719       dst = adjust_address (dst, HImode, 0);
14720       src = adjust_address (src, HImode, 0);
14721       reg0 = gen_reg_rtx (SImode);
14722       if (src_aligned)
14723         emit_insn (gen_zero_extendhisi2 (reg0, src));
14724       else
14725         emit_insn (gen_unaligned_loadhiu (reg0, src));
14726
14727       if (dst_aligned)
14728         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14729       else
14730         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14731
14732       src = next_consecutive_mem (src);
14733       dst = next_consecutive_mem (dst);
14734       if (len == 2)
14735         return true;
14736     }
14737
14738   dst = adjust_address (dst, QImode, 0);
14739   src = adjust_address (src, QImode, 0);
14740   reg0 = gen_reg_rtx (QImode);
14741   emit_move_insn (reg0, src);
14742   emit_move_insn (dst, reg0);
14743   return true;
14744 }
14745
14746 /* Select a dominance comparison mode if possible for a test of the general
14747    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14748    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14749    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14750    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14751    In all cases OP will be either EQ or NE, but we don't need to know which
14752    here.  If we are unable to support a dominance comparison we return
14753    CC mode.  This will then fail to match for the RTL expressions that
14754    generate this call.  */
14755 enum machine_mode
14756 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14757 {
14758   enum rtx_code cond1, cond2;
14759   int swapped = 0;
14760
14761   /* Currently we will probably get the wrong result if the individual
14762      comparisons are not simple.  This also ensures that it is safe to
14763      reverse a comparison if necessary.  */
14764   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14765        != CCmode)
14766       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14767           != CCmode))
14768     return CCmode;
14769
14770   /* The if_then_else variant of this tests the second condition if the
14771      first passes, but is true if the first fails.  Reverse the first
14772      condition to get a true "inclusive-or" expression.  */
14773   if (cond_or == DOM_CC_NX_OR_Y)
14774     cond1 = reverse_condition (cond1);
14775
14776   /* If the comparisons are not equal, and one doesn't dominate the other,
14777      then we can't do this.  */
14778   if (cond1 != cond2
14779       && !comparison_dominates_p (cond1, cond2)
14780       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14781     return CCmode;
14782
14783   if (swapped)
14784     {
14785       enum rtx_code temp = cond1;
14786       cond1 = cond2;
14787       cond2 = temp;
14788     }
14789
14790   switch (cond1)
14791     {
14792     case EQ:
14793       if (cond_or == DOM_CC_X_AND_Y)
14794         return CC_DEQmode;
14795
14796       switch (cond2)
14797         {
14798         case EQ: return CC_DEQmode;
14799         case LE: return CC_DLEmode;
14800         case LEU: return CC_DLEUmode;
14801         case GE: return CC_DGEmode;
14802         case GEU: return CC_DGEUmode;
14803         default: gcc_unreachable ();
14804         }
14805
14806     case LT:
14807       if (cond_or == DOM_CC_X_AND_Y)
14808         return CC_DLTmode;
14809
14810       switch (cond2)
14811         {
14812         case  LT:
14813             return CC_DLTmode;
14814         case LE:
14815           return CC_DLEmode;
14816         case NE:
14817           return CC_DNEmode;
14818         default:
14819           gcc_unreachable ();
14820         }
14821
14822     case GT:
14823       if (cond_or == DOM_CC_X_AND_Y)
14824         return CC_DGTmode;
14825
14826       switch (cond2)
14827         {
14828         case GT:
14829           return CC_DGTmode;
14830         case GE:
14831           return CC_DGEmode;
14832         case NE:
14833           return CC_DNEmode;
14834         default:
14835           gcc_unreachable ();
14836         }
14837
14838     case LTU:
14839       if (cond_or == DOM_CC_X_AND_Y)
14840         return CC_DLTUmode;
14841
14842       switch (cond2)
14843         {
14844         case LTU:
14845           return CC_DLTUmode;
14846         case LEU:
14847           return CC_DLEUmode;
14848         case NE:
14849           return CC_DNEmode;
14850         default:
14851           gcc_unreachable ();
14852         }
14853
14854     case GTU:
14855       if (cond_or == DOM_CC_X_AND_Y)
14856         return CC_DGTUmode;
14857
14858       switch (cond2)
14859         {
14860         case GTU:
14861           return CC_DGTUmode;
14862         case GEU:
14863           return CC_DGEUmode;
14864         case NE:
14865           return CC_DNEmode;
14866         default:
14867           gcc_unreachable ();
14868         }
14869
14870     /* The remaining cases only occur when both comparisons are the
14871        same.  */
14872     case NE:
14873       gcc_assert (cond1 == cond2);
14874       return CC_DNEmode;
14875
14876     case LE:
14877       gcc_assert (cond1 == cond2);
14878       return CC_DLEmode;
14879
14880     case GE:
14881       gcc_assert (cond1 == cond2);
14882       return CC_DGEmode;
14883
14884     case LEU:
14885       gcc_assert (cond1 == cond2);
14886       return CC_DLEUmode;
14887
14888     case GEU:
14889       gcc_assert (cond1 == cond2);
14890       return CC_DGEUmode;
14891
14892     default:
14893       gcc_unreachable ();
14894     }
14895 }
14896
14897 enum machine_mode
14898 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14899 {
14900   /* All floating point compares return CCFP if it is an equality
14901      comparison, and CCFPE otherwise.  */
14902   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14903     {
14904       switch (op)
14905         {
14906         case EQ:
14907         case NE:
14908         case UNORDERED:
14909         case ORDERED:
14910         case UNLT:
14911         case UNLE:
14912         case UNGT:
14913         case UNGE:
14914         case UNEQ:
14915         case LTGT:
14916           return CCFPmode;
14917
14918         case LT:
14919         case LE:
14920         case GT:
14921         case GE:
14922           return CCFPEmode;
14923
14924         default:
14925           gcc_unreachable ();
14926         }
14927     }
14928
14929   /* A compare with a shifted operand.  Because of canonicalization, the
14930      comparison will have to be swapped when we emit the assembler.  */
14931   if (GET_MODE (y) == SImode
14932       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14933       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14934           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14935           || GET_CODE (x) == ROTATERT))
14936     return CC_SWPmode;
14937
14938   /* This operation is performed swapped, but since we only rely on the Z
14939      flag we don't need an additional mode.  */
14940   if (GET_MODE (y) == SImode
14941       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14942       && GET_CODE (x) == NEG
14943       && (op == EQ || op == NE))
14944     return CC_Zmode;
14945
14946   /* This is a special case that is used by combine to allow a
14947      comparison of a shifted byte load to be split into a zero-extend
14948      followed by a comparison of the shifted integer (only valid for
14949      equalities and unsigned inequalities).  */
14950   if (GET_MODE (x) == SImode
14951       && GET_CODE (x) == ASHIFT
14952       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14953       && GET_CODE (XEXP (x, 0)) == SUBREG
14954       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14955       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14956       && (op == EQ || op == NE
14957           || op == GEU || op == GTU || op == LTU || op == LEU)
14958       && CONST_INT_P (y))
14959     return CC_Zmode;
14960
14961   /* A construct for a conditional compare, if the false arm contains
14962      0, then both conditions must be true, otherwise either condition
14963      must be true.  Not all conditions are possible, so CCmode is
14964      returned if it can't be done.  */
14965   if (GET_CODE (x) == IF_THEN_ELSE
14966       && (XEXP (x, 2) == const0_rtx
14967           || XEXP (x, 2) == const1_rtx)
14968       && COMPARISON_P (XEXP (x, 0))
14969       && COMPARISON_P (XEXP (x, 1)))
14970     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14971                                          INTVAL (XEXP (x, 2)));
14972
14973   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14974   if (GET_CODE (x) == AND
14975       && (op == EQ || op == NE)
14976       && COMPARISON_P (XEXP (x, 0))
14977       && COMPARISON_P (XEXP (x, 1)))
14978     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14979                                          DOM_CC_X_AND_Y);
14980
14981   if (GET_CODE (x) == IOR
14982       && (op == EQ || op == NE)
14983       && COMPARISON_P (XEXP (x, 0))
14984       && COMPARISON_P (XEXP (x, 1)))
14985     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14986                                          DOM_CC_X_OR_Y);
14987
14988   /* An operation (on Thumb) where we want to test for a single bit.
14989      This is done by shifting that bit up into the top bit of a
14990      scratch register; we can then branch on the sign bit.  */
14991   if (TARGET_THUMB1
14992       && GET_MODE (x) == SImode
14993       && (op == EQ || op == NE)
14994       && GET_CODE (x) == ZERO_EXTRACT
14995       && XEXP (x, 1) == const1_rtx)
14996     return CC_Nmode;
14997
14998   /* An operation that sets the condition codes as a side-effect, the
14999      V flag is not set correctly, so we can only use comparisons where
15000      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15001      instead.)  */
15002   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15003   if (GET_MODE (x) == SImode
15004       && y == const0_rtx
15005       && (op == EQ || op == NE || op == LT || op == GE)
15006       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15007           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15008           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15009           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15010           || GET_CODE (x) == LSHIFTRT
15011           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15012           || GET_CODE (x) == ROTATERT
15013           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15014     return CC_NOOVmode;
15015
15016   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15017     return CC_Zmode;
15018
15019   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15020       && GET_CODE (x) == PLUS
15021       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15022     return CC_Cmode;
15023
15024   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15025     {
15026       switch (op)
15027         {
15028         case EQ:
15029         case NE:
15030           /* A DImode comparison against zero can be implemented by
15031              or'ing the two halves together.  */
15032           if (y == const0_rtx)
15033             return CC_Zmode;
15034
15035           /* We can do an equality test in three Thumb instructions.  */
15036           if (!TARGET_32BIT)
15037             return CC_Zmode;
15038
15039           /* FALLTHROUGH */
15040
15041         case LTU:
15042         case LEU:
15043         case GTU:
15044         case GEU:
15045           /* DImode unsigned comparisons can be implemented by cmp +
15046              cmpeq without a scratch register.  Not worth doing in
15047              Thumb-2.  */
15048           if (TARGET_32BIT)
15049             return CC_CZmode;
15050
15051           /* FALLTHROUGH */
15052
15053         case LT:
15054         case LE:
15055         case GT:
15056         case GE:
15057           /* DImode signed and unsigned comparisons can be implemented
15058              by cmp + sbcs with a scratch register, but that does not
15059              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15060           gcc_assert (op != EQ && op != NE);
15061           return CC_NCVmode;
15062
15063         default:
15064           gcc_unreachable ();
15065         }
15066     }
15067
15068   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15069     return GET_MODE (x);
15070
15071   return CCmode;
15072 }
15073
15074 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15075    return the rtx for register 0 in the proper mode.  FP means this is a
15076    floating point compare: I don't think that it is needed on the arm.  */
15077 rtx
15078 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15079 {
15080   enum machine_mode mode;
15081   rtx cc_reg;
15082   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15083
15084   /* We might have X as a constant, Y as a register because of the predicates
15085      used for cmpdi.  If so, force X to a register here.  */
15086   if (dimode_comparison && !REG_P (x))
15087     x = force_reg (DImode, x);
15088
15089   mode = SELECT_CC_MODE (code, x, y);
15090   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15091
15092   if (dimode_comparison
15093       && mode != CC_CZmode)
15094     {
15095       rtx clobber, set;
15096
15097       /* To compare two non-zero values for equality, XOR them and
15098          then compare against zero.  Not used for ARM mode; there
15099          CC_CZmode is cheaper.  */
15100       if (mode == CC_Zmode && y != const0_rtx)
15101         {
15102           gcc_assert (!reload_completed);
15103           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15104           y = const0_rtx;
15105         }
15106
15107       /* A scratch register is required.  */
15108       if (reload_completed)
15109         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15110       else
15111         scratch = gen_rtx_SCRATCH (SImode);
15112
15113       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15114       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15115       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15116     }
15117   else
15118     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15119
15120   return cc_reg;
15121 }
15122
15123 /* Generate a sequence of insns that will generate the correct return
15124    address mask depending on the physical architecture that the program
15125    is running on.  */
15126 rtx
15127 arm_gen_return_addr_mask (void)
15128 {
15129   rtx reg = gen_reg_rtx (Pmode);
15130
15131   emit_insn (gen_return_addr_mask (reg));
15132   return reg;
15133 }
15134
15135 void
15136 arm_reload_in_hi (rtx *operands)
15137 {
15138   rtx ref = operands[1];
15139   rtx base, scratch;
15140   HOST_WIDE_INT offset = 0;
15141
15142   if (GET_CODE (ref) == SUBREG)
15143     {
15144       offset = SUBREG_BYTE (ref);
15145       ref = SUBREG_REG (ref);
15146     }
15147
15148   if (REG_P (ref))
15149     {
15150       /* We have a pseudo which has been spilt onto the stack; there
15151          are two cases here: the first where there is a simple
15152          stack-slot replacement and a second where the stack-slot is
15153          out of range, or is used as a subreg.  */
15154       if (reg_equiv_mem (REGNO (ref)))
15155         {
15156           ref = reg_equiv_mem (REGNO (ref));
15157           base = find_replacement (&XEXP (ref, 0));
15158         }
15159       else
15160         /* The slot is out of range, or was dressed up in a SUBREG.  */
15161         base = reg_equiv_address (REGNO (ref));
15162     }
15163   else
15164     base = find_replacement (&XEXP (ref, 0));
15165
15166   /* Handle the case where the address is too complex to be offset by 1.  */
15167   if (GET_CODE (base) == MINUS
15168       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15169     {
15170       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15171
15172       emit_set_insn (base_plus, base);
15173       base = base_plus;
15174     }
15175   else if (GET_CODE (base) == PLUS)
15176     {
15177       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15178       HOST_WIDE_INT hi, lo;
15179
15180       offset += INTVAL (XEXP (base, 1));
15181       base = XEXP (base, 0);
15182
15183       /* Rework the address into a legal sequence of insns.  */
15184       /* Valid range for lo is -4095 -> 4095 */
15185       lo = (offset >= 0
15186             ? (offset & 0xfff)
15187             : -((-offset) & 0xfff));
15188
15189       /* Corner case, if lo is the max offset then we would be out of range
15190          once we have added the additional 1 below, so bump the msb into the
15191          pre-loading insn(s).  */
15192       if (lo == 4095)
15193         lo &= 0x7ff;
15194
15195       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15196              ^ (HOST_WIDE_INT) 0x80000000)
15197             - (HOST_WIDE_INT) 0x80000000);
15198
15199       gcc_assert (hi + lo == offset);
15200
15201       if (hi != 0)
15202         {
15203           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15204
15205           /* Get the base address; addsi3 knows how to handle constants
15206              that require more than one insn.  */
15207           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15208           base = base_plus;
15209           offset = lo;
15210         }
15211     }
15212
15213   /* Operands[2] may overlap operands[0] (though it won't overlap
15214      operands[1]), that's why we asked for a DImode reg -- so we can
15215      use the bit that does not overlap.  */
15216   if (REGNO (operands[2]) == REGNO (operands[0]))
15217     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15218   else
15219     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15220
15221   emit_insn (gen_zero_extendqisi2 (scratch,
15222                                    gen_rtx_MEM (QImode,
15223                                                 plus_constant (Pmode, base,
15224                                                                offset))));
15225   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15226                                    gen_rtx_MEM (QImode,
15227                                                 plus_constant (Pmode, base,
15228                                                                offset + 1))));
15229   if (!BYTES_BIG_ENDIAN)
15230     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15231                    gen_rtx_IOR (SImode,
15232                                 gen_rtx_ASHIFT
15233                                 (SImode,
15234                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15235                                  GEN_INT (8)),
15236                                 scratch));
15237   else
15238     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15239                    gen_rtx_IOR (SImode,
15240                                 gen_rtx_ASHIFT (SImode, scratch,
15241                                                 GEN_INT (8)),
15242                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15243 }
15244
15245 /* Handle storing a half-word to memory during reload by synthesizing as two
15246    byte stores.  Take care not to clobber the input values until after we
15247    have moved them somewhere safe.  This code assumes that if the DImode
15248    scratch in operands[2] overlaps either the input value or output address
15249    in some way, then that value must die in this insn (we absolutely need
15250    two scratch registers for some corner cases).  */
15251 void
15252 arm_reload_out_hi (rtx *operands)
15253 {
15254   rtx ref = operands[0];
15255   rtx outval = operands[1];
15256   rtx base, scratch;
15257   HOST_WIDE_INT offset = 0;
15258
15259   if (GET_CODE (ref) == SUBREG)
15260     {
15261       offset = SUBREG_BYTE (ref);
15262       ref = SUBREG_REG (ref);
15263     }
15264
15265   if (REG_P (ref))
15266     {
15267       /* We have a pseudo which has been spilt onto the stack; there
15268          are two cases here: the first where there is a simple
15269          stack-slot replacement and a second where the stack-slot is
15270          out of range, or is used as a subreg.  */
15271       if (reg_equiv_mem (REGNO (ref)))
15272         {
15273           ref = reg_equiv_mem (REGNO (ref));
15274           base = find_replacement (&XEXP (ref, 0));
15275         }
15276       else
15277         /* The slot is out of range, or was dressed up in a SUBREG.  */
15278         base = reg_equiv_address (REGNO (ref));
15279     }
15280   else
15281     base = find_replacement (&XEXP (ref, 0));
15282
15283   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15284
15285   /* Handle the case where the address is too complex to be offset by 1.  */
15286   if (GET_CODE (base) == MINUS
15287       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15288     {
15289       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15290
15291       /* Be careful not to destroy OUTVAL.  */
15292       if (reg_overlap_mentioned_p (base_plus, outval))
15293         {
15294           /* Updating base_plus might destroy outval, see if we can
15295              swap the scratch and base_plus.  */
15296           if (!reg_overlap_mentioned_p (scratch, outval))
15297             {
15298               rtx tmp = scratch;
15299               scratch = base_plus;
15300               base_plus = tmp;
15301             }
15302           else
15303             {
15304               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15305
15306               /* Be conservative and copy OUTVAL into the scratch now,
15307                  this should only be necessary if outval is a subreg
15308                  of something larger than a word.  */
15309               /* XXX Might this clobber base?  I can't see how it can,
15310                  since scratch is known to overlap with OUTVAL, and
15311                  must be wider than a word.  */
15312               emit_insn (gen_movhi (scratch_hi, outval));
15313               outval = scratch_hi;
15314             }
15315         }
15316
15317       emit_set_insn (base_plus, base);
15318       base = base_plus;
15319     }
15320   else if (GET_CODE (base) == PLUS)
15321     {
15322       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15323       HOST_WIDE_INT hi, lo;
15324
15325       offset += INTVAL (XEXP (base, 1));
15326       base = XEXP (base, 0);
15327
15328       /* Rework the address into a legal sequence of insns.  */
15329       /* Valid range for lo is -4095 -> 4095 */
15330       lo = (offset >= 0
15331             ? (offset & 0xfff)
15332             : -((-offset) & 0xfff));
15333
15334       /* Corner case, if lo is the max offset then we would be out of range
15335          once we have added the additional 1 below, so bump the msb into the
15336          pre-loading insn(s).  */
15337       if (lo == 4095)
15338         lo &= 0x7ff;
15339
15340       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15341              ^ (HOST_WIDE_INT) 0x80000000)
15342             - (HOST_WIDE_INT) 0x80000000);
15343
15344       gcc_assert (hi + lo == offset);
15345
15346       if (hi != 0)
15347         {
15348           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15349
15350           /* Be careful not to destroy OUTVAL.  */
15351           if (reg_overlap_mentioned_p (base_plus, outval))
15352             {
15353               /* Updating base_plus might destroy outval, see if we
15354                  can swap the scratch and base_plus.  */
15355               if (!reg_overlap_mentioned_p (scratch, outval))
15356                 {
15357                   rtx tmp = scratch;
15358                   scratch = base_plus;
15359                   base_plus = tmp;
15360                 }
15361               else
15362                 {
15363                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15364
15365                   /* Be conservative and copy outval into scratch now,
15366                      this should only be necessary if outval is a
15367                      subreg of something larger than a word.  */
15368                   /* XXX Might this clobber base?  I can't see how it
15369                      can, since scratch is known to overlap with
15370                      outval.  */
15371                   emit_insn (gen_movhi (scratch_hi, outval));
15372                   outval = scratch_hi;
15373                 }
15374             }
15375
15376           /* Get the base address; addsi3 knows how to handle constants
15377              that require more than one insn.  */
15378           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15379           base = base_plus;
15380           offset = lo;
15381         }
15382     }
15383
15384   if (BYTES_BIG_ENDIAN)
15385     {
15386       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15387                                          plus_constant (Pmode, base,
15388                                                         offset + 1)),
15389                             gen_lowpart (QImode, outval)));
15390       emit_insn (gen_lshrsi3 (scratch,
15391                               gen_rtx_SUBREG (SImode, outval, 0),
15392                               GEN_INT (8)));
15393       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15394                                                                 offset)),
15395                             gen_lowpart (QImode, scratch)));
15396     }
15397   else
15398     {
15399       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15400                                                                 offset)),
15401                             gen_lowpart (QImode, outval)));
15402       emit_insn (gen_lshrsi3 (scratch,
15403                               gen_rtx_SUBREG (SImode, outval, 0),
15404                               GEN_INT (8)));
15405       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15406                                          plus_constant (Pmode, base,
15407                                                         offset + 1)),
15408                             gen_lowpart (QImode, scratch)));
15409     }
15410 }
15411
15412 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15413    (padded to the size of a word) should be passed in a register.  */
15414
15415 static bool
15416 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15417 {
15418   if (TARGET_AAPCS_BASED)
15419     return must_pass_in_stack_var_size (mode, type);
15420   else
15421     return must_pass_in_stack_var_size_or_pad (mode, type);
15422 }
15423
15424
15425 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15426    Return true if an argument passed on the stack should be padded upwards,
15427    i.e. if the least-significant byte has useful data.
15428    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15429    aggregate types are placed in the lowest memory address.  */
15430
15431 bool
15432 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15433 {
15434   if (!TARGET_AAPCS_BASED)
15435     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15436
15437   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15438     return false;
15439
15440   return true;
15441 }
15442
15443
15444 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15445    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15446    register has useful data, and return the opposite if the most
15447    significant byte does.  */
15448
15449 bool
15450 arm_pad_reg_upward (enum machine_mode mode,
15451                     tree type, int first ATTRIBUTE_UNUSED)
15452 {
15453   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15454     {
15455       /* For AAPCS, small aggregates, small fixed-point types,
15456          and small complex types are always padded upwards.  */
15457       if (type)
15458         {
15459           if ((AGGREGATE_TYPE_P (type)
15460                || TREE_CODE (type) == COMPLEX_TYPE
15461                || FIXED_POINT_TYPE_P (type))
15462               && int_size_in_bytes (type) <= 4)
15463             return true;
15464         }
15465       else
15466         {
15467           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15468               && GET_MODE_SIZE (mode) <= 4)
15469             return true;
15470         }
15471     }
15472
15473   /* Otherwise, use default padding.  */
15474   return !BYTES_BIG_ENDIAN;
15475 }
15476
15477 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15478    assuming that the address in the base register is word aligned.  */
15479 bool
15480 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15481 {
15482   HOST_WIDE_INT max_offset;
15483
15484   /* Offset must be a multiple of 4 in Thumb mode.  */
15485   if (TARGET_THUMB2 && ((offset & 3) != 0))
15486     return false;
15487
15488   if (TARGET_THUMB2)
15489     max_offset = 1020;
15490   else if (TARGET_ARM)
15491     max_offset = 255;
15492   else
15493     return false;
15494
15495   return ((offset <= max_offset) && (offset >= -max_offset));
15496 }
15497
15498 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15499    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15500    Assumes that the address in the base register RN is word aligned.  Pattern
15501    guarantees that both memory accesses use the same base register,
15502    the offsets are constants within the range, and the gap between the offsets is 4.
15503    If preload complete then check that registers are legal.  WBACK indicates whether
15504    address is updated.  LOAD indicates whether memory access is load or store.  */
15505 bool
15506 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15507                        bool wback, bool load)
15508 {
15509   unsigned int t, t2, n;
15510
15511   if (!reload_completed)
15512     return true;
15513
15514   if (!offset_ok_for_ldrd_strd (offset))
15515     return false;
15516
15517   t = REGNO (rt);
15518   t2 = REGNO (rt2);
15519   n = REGNO (rn);
15520
15521   if ((TARGET_THUMB2)
15522       && ((wback && (n == t || n == t2))
15523           || (t == SP_REGNUM)
15524           || (t == PC_REGNUM)
15525           || (t2 == SP_REGNUM)
15526           || (t2 == PC_REGNUM)
15527           || (!load && (n == PC_REGNUM))
15528           || (load && (t == t2))
15529           /* Triggers Cortex-M3 LDRD errata.  */
15530           || (!wback && load && fix_cm3_ldrd && (n == t))))
15531     return false;
15532
15533   if ((TARGET_ARM)
15534       && ((wback && (n == t || n == t2))
15535           || (t2 == PC_REGNUM)
15536           || (t % 2 != 0)   /* First destination register is not even.  */
15537           || (t2 != t + 1)
15538           /* PC can be used as base register (for offset addressing only),
15539              but it is depricated.  */
15540           || (n == PC_REGNUM)))
15541     return false;
15542
15543   return true;
15544 }
15545
15546 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15547    operand MEM's address contains an immediate offset from the base
15548    register and has no side effects, in which case it sets BASE and
15549    OFFSET accordingly.  */
15550 static bool
15551 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15552 {
15553   rtx addr;
15554
15555   gcc_assert (base != NULL && offset != NULL);
15556
15557   /* TODO: Handle more general memory operand patterns, such as
15558      PRE_DEC and PRE_INC.  */
15559
15560   if (side_effects_p (mem))
15561     return false;
15562
15563   /* Can't deal with subregs.  */
15564   if (GET_CODE (mem) == SUBREG)
15565     return false;
15566
15567   gcc_assert (MEM_P (mem));
15568
15569   *offset = const0_rtx;
15570
15571   addr = XEXP (mem, 0);
15572
15573   /* If addr isn't valid for DImode, then we can't handle it.  */
15574   if (!arm_legitimate_address_p (DImode, addr,
15575                                  reload_in_progress || reload_completed))
15576     return false;
15577
15578   if (REG_P (addr))
15579     {
15580       *base = addr;
15581       return true;
15582     }
15583   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15584     {
15585       *base = XEXP (addr, 0);
15586       *offset = XEXP (addr, 1);
15587       return (REG_P (*base) && CONST_INT_P (*offset));
15588     }
15589
15590   return false;
15591 }
15592
15593 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15594
15595 /* Called from a peephole2 to replace two word-size accesses with a
15596    single LDRD/STRD instruction.  Returns true iff we can generate a
15597    new instruction sequence.  That is, both accesses use the same base
15598    register and the gap between constant offsets is 4.  This function
15599    may reorder its operands to match ldrd/strd RTL templates.
15600    OPERANDS are the operands found by the peephole matcher;
15601    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15602    corresponding memory operands.  LOAD indicaates whether the access
15603    is load or store.  CONST_STORE indicates a store of constant
15604    integer values held in OPERANDS[4,5] and assumes that the pattern
15605    is of length 4 insn, for the purpose of checking dead registers.
15606    COMMUTE indicates that register operands may be reordered.  */
15607 bool
15608 gen_operands_ldrd_strd (rtx *operands, bool load,
15609                         bool const_store, bool commute)
15610 {
15611   int nops = 2;
15612   HOST_WIDE_INT offsets[2], offset;
15613   rtx base = NULL_RTX;
15614   rtx cur_base, cur_offset, tmp;
15615   int i, gap;
15616   HARD_REG_SET regset;
15617
15618   gcc_assert (!const_store || !load);
15619   /* Check that the memory references are immediate offsets from the
15620      same base register.  Extract the base register, the destination
15621      registers, and the corresponding memory offsets.  */
15622   for (i = 0; i < nops; i++)
15623     {
15624       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15625         return false;
15626
15627       if (i == 0)
15628         base = cur_base;
15629       else if (REGNO (base) != REGNO (cur_base))
15630         return false;
15631
15632       offsets[i] = INTVAL (cur_offset);
15633       if (GET_CODE (operands[i]) == SUBREG)
15634         {
15635           tmp = SUBREG_REG (operands[i]);
15636           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15637           operands[i] = tmp;
15638         }
15639     }
15640
15641   /* Make sure there is no dependency between the individual loads.  */
15642   if (load && REGNO (operands[0]) == REGNO (base))
15643     return false; /* RAW */
15644
15645   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15646     return false; /* WAW */
15647
15648   /* If the same input register is used in both stores
15649      when storing different constants, try to find a free register.
15650      For example, the code
15651         mov r0, 0
15652         str r0, [r2]
15653         mov r0, 1
15654         str r0, [r2, #4]
15655      can be transformed into
15656         mov r1, 0
15657         strd r1, r0, [r2]
15658      in Thumb mode assuming that r1 is free.  */
15659   if (const_store
15660       && REGNO (operands[0]) == REGNO (operands[1])
15661       && INTVAL (operands[4]) != INTVAL (operands[5]))
15662     {
15663     if (TARGET_THUMB2)
15664       {
15665         CLEAR_HARD_REG_SET (regset);
15666         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15667         if (tmp == NULL_RTX)
15668           return false;
15669
15670         /* Use the new register in the first load to ensure that
15671            if the original input register is not dead after peephole,
15672            then it will have the correct constant value.  */
15673         operands[0] = tmp;
15674       }
15675     else if (TARGET_ARM)
15676       {
15677         return false;
15678         int regno = REGNO (operands[0]);
15679         if (!peep2_reg_dead_p (4, operands[0]))
15680           {
15681             /* When the input register is even and is not dead after the
15682                pattern, it has to hold the second constant but we cannot
15683                form a legal STRD in ARM mode with this register as the second
15684                register.  */
15685             if (regno % 2 == 0)
15686               return false;
15687
15688             /* Is regno-1 free? */
15689             SET_HARD_REG_SET (regset);
15690             CLEAR_HARD_REG_BIT(regset, regno - 1);
15691             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15692             if (tmp == NULL_RTX)
15693               return false;
15694
15695             operands[0] = tmp;
15696           }
15697         else
15698           {
15699             /* Find a DImode register.  */
15700             CLEAR_HARD_REG_SET (regset);
15701             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15702             if (tmp != NULL_RTX)
15703               {
15704                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15705                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15706               }
15707             else
15708               {
15709                 /* Can we use the input register to form a DI register?  */
15710                 SET_HARD_REG_SET (regset);
15711                 CLEAR_HARD_REG_BIT(regset,
15712                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15713                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15714                 if (tmp == NULL_RTX)
15715                   return false;
15716                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15717               }
15718           }
15719
15720         gcc_assert (operands[0] != NULL_RTX);
15721         gcc_assert (operands[1] != NULL_RTX);
15722         gcc_assert (REGNO (operands[0]) % 2 == 0);
15723         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15724       }
15725     }
15726
15727   /* Make sure the instructions are ordered with lower memory access first.  */
15728   if (offsets[0] > offsets[1])
15729     {
15730       gap = offsets[0] - offsets[1];
15731       offset = offsets[1];
15732
15733       /* Swap the instructions such that lower memory is accessed first.  */
15734       SWAP_RTX (operands[0], operands[1]);
15735       SWAP_RTX (operands[2], operands[3]);
15736       if (const_store)
15737         SWAP_RTX (operands[4], operands[5]);
15738     }
15739   else
15740     {
15741       gap = offsets[1] - offsets[0];
15742       offset = offsets[0];
15743     }
15744
15745   /* Make sure accesses are to consecutive memory locations.  */
15746   if (gap != 4)
15747     return false;
15748
15749   /* Make sure we generate legal instructions.  */
15750   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15751                              false, load))
15752     return true;
15753
15754   /* In Thumb state, where registers are almost unconstrained, there
15755      is little hope to fix it.  */
15756   if (TARGET_THUMB2)
15757     return false;
15758
15759   if (load && commute)
15760     {
15761       /* Try reordering registers.  */
15762       SWAP_RTX (operands[0], operands[1]);
15763       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15764                                  false, load))
15765         return true;
15766     }
15767
15768   if (const_store)
15769     {
15770       /* If input registers are dead after this pattern, they can be
15771          reordered or replaced by other registers that are free in the
15772          current pattern.  */
15773       if (!peep2_reg_dead_p (4, operands[0])
15774           || !peep2_reg_dead_p (4, operands[1]))
15775         return false;
15776
15777       /* Try to reorder the input registers.  */
15778       /* For example, the code
15779            mov r0, 0
15780            mov r1, 1
15781            str r1, [r2]
15782            str r0, [r2, #4]
15783          can be transformed into
15784            mov r1, 0
15785            mov r0, 1
15786            strd r0, [r2]
15787       */
15788       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15789                                   false, false))
15790         {
15791           SWAP_RTX (operands[0], operands[1]);
15792           return true;
15793         }
15794
15795       /* Try to find a free DI register.  */
15796       CLEAR_HARD_REG_SET (regset);
15797       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15798       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15799       while (true)
15800         {
15801           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15802           if (tmp == NULL_RTX)
15803             return false;
15804
15805           /* DREG must be an even-numbered register in DImode.
15806              Split it into SI registers.  */
15807           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15808           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15809           gcc_assert (operands[0] != NULL_RTX);
15810           gcc_assert (operands[1] != NULL_RTX);
15811           gcc_assert (REGNO (operands[0]) % 2 == 0);
15812           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15813
15814           return (operands_ok_ldrd_strd (operands[0], operands[1],
15815                                          base, offset,
15816                                          false, load));
15817         }
15818     }
15819
15820   return false;
15821 }
15822 #undef SWAP_RTX
15823
15824
15825
15826 \f
15827 /* Print a symbolic form of X to the debug file, F.  */
15828 static void
15829 arm_print_value (FILE *f, rtx x)
15830 {
15831   switch (GET_CODE (x))
15832     {
15833     case CONST_INT:
15834       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15835       return;
15836
15837     case CONST_DOUBLE:
15838       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15839       return;
15840
15841     case CONST_VECTOR:
15842       {
15843         int i;
15844
15845         fprintf (f, "<");
15846         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15847           {
15848             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15849             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15850               fputc (',', f);
15851           }
15852         fprintf (f, ">");
15853       }
15854       return;
15855
15856     case CONST_STRING:
15857       fprintf (f, "\"%s\"", XSTR (x, 0));
15858       return;
15859
15860     case SYMBOL_REF:
15861       fprintf (f, "`%s'", XSTR (x, 0));
15862       return;
15863
15864     case LABEL_REF:
15865       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15866       return;
15867
15868     case CONST:
15869       arm_print_value (f, XEXP (x, 0));
15870       return;
15871
15872     case PLUS:
15873       arm_print_value (f, XEXP (x, 0));
15874       fprintf (f, "+");
15875       arm_print_value (f, XEXP (x, 1));
15876       return;
15877
15878     case PC:
15879       fprintf (f, "pc");
15880       return;
15881
15882     default:
15883       fprintf (f, "????");
15884       return;
15885     }
15886 }
15887 \f
15888 /* Routines for manipulation of the constant pool.  */
15889
15890 /* Arm instructions cannot load a large constant directly into a
15891    register; they have to come from a pc relative load.  The constant
15892    must therefore be placed in the addressable range of the pc
15893    relative load.  Depending on the precise pc relative load
15894    instruction the range is somewhere between 256 bytes and 4k.  This
15895    means that we often have to dump a constant inside a function, and
15896    generate code to branch around it.
15897
15898    It is important to minimize this, since the branches will slow
15899    things down and make the code larger.
15900
15901    Normally we can hide the table after an existing unconditional
15902    branch so that there is no interruption of the flow, but in the
15903    worst case the code looks like this:
15904
15905         ldr     rn, L1
15906         ...
15907         b       L2
15908         align
15909         L1:     .long value
15910         L2:
15911         ...
15912
15913         ldr     rn, L3
15914         ...
15915         b       L4
15916         align
15917         L3:     .long value
15918         L4:
15919         ...
15920
15921    We fix this by performing a scan after scheduling, which notices
15922    which instructions need to have their operands fetched from the
15923    constant table and builds the table.
15924
15925    The algorithm starts by building a table of all the constants that
15926    need fixing up and all the natural barriers in the function (places
15927    where a constant table can be dropped without breaking the flow).
15928    For each fixup we note how far the pc-relative replacement will be
15929    able to reach and the offset of the instruction into the function.
15930
15931    Having built the table we then group the fixes together to form
15932    tables that are as large as possible (subject to addressing
15933    constraints) and emit each table of constants after the last
15934    barrier that is within range of all the instructions in the group.
15935    If a group does not contain a barrier, then we forcibly create one
15936    by inserting a jump instruction into the flow.  Once the table has
15937    been inserted, the insns are then modified to reference the
15938    relevant entry in the pool.
15939
15940    Possible enhancements to the algorithm (not implemented) are:
15941
15942    1) For some processors and object formats, there may be benefit in
15943    aligning the pools to the start of cache lines; this alignment
15944    would need to be taken into account when calculating addressability
15945    of a pool.  */
15946
15947 /* These typedefs are located at the start of this file, so that
15948    they can be used in the prototypes there.  This comment is to
15949    remind readers of that fact so that the following structures
15950    can be understood more easily.
15951
15952      typedef struct minipool_node    Mnode;
15953      typedef struct minipool_fixup   Mfix;  */
15954
15955 struct minipool_node
15956 {
15957   /* Doubly linked chain of entries.  */
15958   Mnode * next;
15959   Mnode * prev;
15960   /* The maximum offset into the code that this entry can be placed.  While
15961      pushing fixes for forward references, all entries are sorted in order
15962      of increasing max_address.  */
15963   HOST_WIDE_INT max_address;
15964   /* Similarly for an entry inserted for a backwards ref.  */
15965   HOST_WIDE_INT min_address;
15966   /* The number of fixes referencing this entry.  This can become zero
15967      if we "unpush" an entry.  In this case we ignore the entry when we
15968      come to emit the code.  */
15969   int refcount;
15970   /* The offset from the start of the minipool.  */
15971   HOST_WIDE_INT offset;
15972   /* The value in table.  */
15973   rtx value;
15974   /* The mode of value.  */
15975   enum machine_mode mode;
15976   /* The size of the value.  With iWMMXt enabled
15977      sizes > 4 also imply an alignment of 8-bytes.  */
15978   int fix_size;
15979 };
15980
15981 struct minipool_fixup
15982 {
15983   Mfix *            next;
15984   rtx               insn;
15985   HOST_WIDE_INT     address;
15986   rtx *             loc;
15987   enum machine_mode mode;
15988   int               fix_size;
15989   rtx               value;
15990   Mnode *           minipool;
15991   HOST_WIDE_INT     forwards;
15992   HOST_WIDE_INT     backwards;
15993 };
15994
15995 /* Fixes less than a word need padding out to a word boundary.  */
15996 #define MINIPOOL_FIX_SIZE(mode) \
15997   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15998
15999 static Mnode *  minipool_vector_head;
16000 static Mnode *  minipool_vector_tail;
16001 static rtx      minipool_vector_label;
16002 static int      minipool_pad;
16003
16004 /* The linked list of all minipool fixes required for this function.  */
16005 Mfix *          minipool_fix_head;
16006 Mfix *          minipool_fix_tail;
16007 /* The fix entry for the current minipool, once it has been placed.  */
16008 Mfix *          minipool_barrier;
16009
16010 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16011 #define JUMP_TABLES_IN_TEXT_SECTION 0
16012 #endif
16013
16014 static HOST_WIDE_INT
16015 get_jump_table_size (rtx insn)
16016 {
16017   /* ADDR_VECs only take room if read-only data does into the text
16018      section.  */
16019   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16020     {
16021       rtx body = PATTERN (insn);
16022       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16023       HOST_WIDE_INT size;
16024       HOST_WIDE_INT modesize;
16025
16026       modesize = GET_MODE_SIZE (GET_MODE (body));
16027       size = modesize * XVECLEN (body, elt);
16028       switch (modesize)
16029         {
16030         case 1:
16031           /* Round up size  of TBB table to a halfword boundary.  */
16032           size = (size + 1) & ~(HOST_WIDE_INT)1;
16033           break;
16034         case 2:
16035           /* No padding necessary for TBH.  */
16036           break;
16037         case 4:
16038           /* Add two bytes for alignment on Thumb.  */
16039           if (TARGET_THUMB)
16040             size += 2;
16041           break;
16042         default:
16043           gcc_unreachable ();
16044         }
16045       return size;
16046     }
16047
16048   return 0;
16049 }
16050
16051 /* Return the maximum amount of padding that will be inserted before
16052    label LABEL.  */
16053
16054 static HOST_WIDE_INT
16055 get_label_padding (rtx label)
16056 {
16057   HOST_WIDE_INT align, min_insn_size;
16058
16059   align = 1 << label_to_alignment (label);
16060   min_insn_size = TARGET_THUMB ? 2 : 4;
16061   return align > min_insn_size ? align - min_insn_size : 0;
16062 }
16063
16064 /* Move a minipool fix MP from its current location to before MAX_MP.
16065    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16066    constraints may need updating.  */
16067 static Mnode *
16068 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16069                                HOST_WIDE_INT max_address)
16070 {
16071   /* The code below assumes these are different.  */
16072   gcc_assert (mp != max_mp);
16073
16074   if (max_mp == NULL)
16075     {
16076       if (max_address < mp->max_address)
16077         mp->max_address = max_address;
16078     }
16079   else
16080     {
16081       if (max_address > max_mp->max_address - mp->fix_size)
16082         mp->max_address = max_mp->max_address - mp->fix_size;
16083       else
16084         mp->max_address = max_address;
16085
16086       /* Unlink MP from its current position.  Since max_mp is non-null,
16087        mp->prev must be non-null.  */
16088       mp->prev->next = mp->next;
16089       if (mp->next != NULL)
16090         mp->next->prev = mp->prev;
16091       else
16092         minipool_vector_tail = mp->prev;
16093
16094       /* Re-insert it before MAX_MP.  */
16095       mp->next = max_mp;
16096       mp->prev = max_mp->prev;
16097       max_mp->prev = mp;
16098
16099       if (mp->prev != NULL)
16100         mp->prev->next = mp;
16101       else
16102         minipool_vector_head = mp;
16103     }
16104
16105   /* Save the new entry.  */
16106   max_mp = mp;
16107
16108   /* Scan over the preceding entries and adjust their addresses as
16109      required.  */
16110   while (mp->prev != NULL
16111          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16112     {
16113       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16114       mp = mp->prev;
16115     }
16116
16117   return max_mp;
16118 }
16119
16120 /* Add a constant to the minipool for a forward reference.  Returns the
16121    node added or NULL if the constant will not fit in this pool.  */
16122 static Mnode *
16123 add_minipool_forward_ref (Mfix *fix)
16124 {
16125   /* If set, max_mp is the first pool_entry that has a lower
16126      constraint than the one we are trying to add.  */
16127   Mnode *       max_mp = NULL;
16128   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16129   Mnode *       mp;
16130
16131   /* If the minipool starts before the end of FIX->INSN then this FIX
16132      can not be placed into the current pool.  Furthermore, adding the
16133      new constant pool entry may cause the pool to start FIX_SIZE bytes
16134      earlier.  */
16135   if (minipool_vector_head &&
16136       (fix->address + get_attr_length (fix->insn)
16137        >= minipool_vector_head->max_address - fix->fix_size))
16138     return NULL;
16139
16140   /* Scan the pool to see if a constant with the same value has
16141      already been added.  While we are doing this, also note the
16142      location where we must insert the constant if it doesn't already
16143      exist.  */
16144   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16145     {
16146       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16147           && fix->mode == mp->mode
16148           && (!LABEL_P (fix->value)
16149               || (CODE_LABEL_NUMBER (fix->value)
16150                   == CODE_LABEL_NUMBER (mp->value)))
16151           && rtx_equal_p (fix->value, mp->value))
16152         {
16153           /* More than one fix references this entry.  */
16154           mp->refcount++;
16155           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16156         }
16157
16158       /* Note the insertion point if necessary.  */
16159       if (max_mp == NULL
16160           && mp->max_address > max_address)
16161         max_mp = mp;
16162
16163       /* If we are inserting an 8-bytes aligned quantity and
16164          we have not already found an insertion point, then
16165          make sure that all such 8-byte aligned quantities are
16166          placed at the start of the pool.  */
16167       if (ARM_DOUBLEWORD_ALIGN
16168           && max_mp == NULL
16169           && fix->fix_size >= 8
16170           && mp->fix_size < 8)
16171         {
16172           max_mp = mp;
16173           max_address = mp->max_address;
16174         }
16175     }
16176
16177   /* The value is not currently in the minipool, so we need to create
16178      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16179      the end of the list since the placement is less constrained than
16180      any existing entry.  Otherwise, we insert the new fix before
16181      MAX_MP and, if necessary, adjust the constraints on the other
16182      entries.  */
16183   mp = XNEW (Mnode);
16184   mp->fix_size = fix->fix_size;
16185   mp->mode = fix->mode;
16186   mp->value = fix->value;
16187   mp->refcount = 1;
16188   /* Not yet required for a backwards ref.  */
16189   mp->min_address = -65536;
16190
16191   if (max_mp == NULL)
16192     {
16193       mp->max_address = max_address;
16194       mp->next = NULL;
16195       mp->prev = minipool_vector_tail;
16196
16197       if (mp->prev == NULL)
16198         {
16199           minipool_vector_head = mp;
16200           minipool_vector_label = gen_label_rtx ();
16201         }
16202       else
16203         mp->prev->next = mp;
16204
16205       minipool_vector_tail = mp;
16206     }
16207   else
16208     {
16209       if (max_address > max_mp->max_address - mp->fix_size)
16210         mp->max_address = max_mp->max_address - mp->fix_size;
16211       else
16212         mp->max_address = max_address;
16213
16214       mp->next = max_mp;
16215       mp->prev = max_mp->prev;
16216       max_mp->prev = mp;
16217       if (mp->prev != NULL)
16218         mp->prev->next = mp;
16219       else
16220         minipool_vector_head = mp;
16221     }
16222
16223   /* Save the new entry.  */
16224   max_mp = mp;
16225
16226   /* Scan over the preceding entries and adjust their addresses as
16227      required.  */
16228   while (mp->prev != NULL
16229          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16230     {
16231       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16232       mp = mp->prev;
16233     }
16234
16235   return max_mp;
16236 }
16237
16238 static Mnode *
16239 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16240                                 HOST_WIDE_INT  min_address)
16241 {
16242   HOST_WIDE_INT offset;
16243
16244   /* The code below assumes these are different.  */
16245   gcc_assert (mp != min_mp);
16246
16247   if (min_mp == NULL)
16248     {
16249       if (min_address > mp->min_address)
16250         mp->min_address = min_address;
16251     }
16252   else
16253     {
16254       /* We will adjust this below if it is too loose.  */
16255       mp->min_address = min_address;
16256
16257       /* Unlink MP from its current position.  Since min_mp is non-null,
16258          mp->next must be non-null.  */
16259       mp->next->prev = mp->prev;
16260       if (mp->prev != NULL)
16261         mp->prev->next = mp->next;
16262       else
16263         minipool_vector_head = mp->next;
16264
16265       /* Reinsert it after MIN_MP.  */
16266       mp->prev = min_mp;
16267       mp->next = min_mp->next;
16268       min_mp->next = mp;
16269       if (mp->next != NULL)
16270         mp->next->prev = mp;
16271       else
16272         minipool_vector_tail = mp;
16273     }
16274
16275   min_mp = mp;
16276
16277   offset = 0;
16278   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16279     {
16280       mp->offset = offset;
16281       if (mp->refcount > 0)
16282         offset += mp->fix_size;
16283
16284       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16285         mp->next->min_address = mp->min_address + mp->fix_size;
16286     }
16287
16288   return min_mp;
16289 }
16290
16291 /* Add a constant to the minipool for a backward reference.  Returns the
16292    node added or NULL if the constant will not fit in this pool.
16293
16294    Note that the code for insertion for a backwards reference can be
16295    somewhat confusing because the calculated offsets for each fix do
16296    not take into account the size of the pool (which is still under
16297    construction.  */
16298 static Mnode *
16299 add_minipool_backward_ref (Mfix *fix)
16300 {
16301   /* If set, min_mp is the last pool_entry that has a lower constraint
16302      than the one we are trying to add.  */
16303   Mnode *min_mp = NULL;
16304   /* This can be negative, since it is only a constraint.  */
16305   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16306   Mnode *mp;
16307
16308   /* If we can't reach the current pool from this insn, or if we can't
16309      insert this entry at the end of the pool without pushing other
16310      fixes out of range, then we don't try.  This ensures that we
16311      can't fail later on.  */
16312   if (min_address >= minipool_barrier->address
16313       || (minipool_vector_tail->min_address + fix->fix_size
16314           >= minipool_barrier->address))
16315     return NULL;
16316
16317   /* Scan the pool to see if a constant with the same value has
16318      already been added.  While we are doing this, also note the
16319      location where we must insert the constant if it doesn't already
16320      exist.  */
16321   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16322     {
16323       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16324           && fix->mode == mp->mode
16325           && (!LABEL_P (fix->value)
16326               || (CODE_LABEL_NUMBER (fix->value)
16327                   == CODE_LABEL_NUMBER (mp->value)))
16328           && rtx_equal_p (fix->value, mp->value)
16329           /* Check that there is enough slack to move this entry to the
16330              end of the table (this is conservative).  */
16331           && (mp->max_address
16332               > (minipool_barrier->address
16333                  + minipool_vector_tail->offset
16334                  + minipool_vector_tail->fix_size)))
16335         {
16336           mp->refcount++;
16337           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16338         }
16339
16340       if (min_mp != NULL)
16341         mp->min_address += fix->fix_size;
16342       else
16343         {
16344           /* Note the insertion point if necessary.  */
16345           if (mp->min_address < min_address)
16346             {
16347               /* For now, we do not allow the insertion of 8-byte alignment
16348                  requiring nodes anywhere but at the start of the pool.  */
16349               if (ARM_DOUBLEWORD_ALIGN
16350                   && fix->fix_size >= 8 && mp->fix_size < 8)
16351                 return NULL;
16352               else
16353                 min_mp = mp;
16354             }
16355           else if (mp->max_address
16356                    < minipool_barrier->address + mp->offset + fix->fix_size)
16357             {
16358               /* Inserting before this entry would push the fix beyond
16359                  its maximum address (which can happen if we have
16360                  re-located a forwards fix); force the new fix to come
16361                  after it.  */
16362               if (ARM_DOUBLEWORD_ALIGN
16363                   && fix->fix_size >= 8 && mp->fix_size < 8)
16364                 return NULL;
16365               else
16366                 {
16367                   min_mp = mp;
16368                   min_address = mp->min_address + fix->fix_size;
16369                 }
16370             }
16371           /* Do not insert a non-8-byte aligned quantity before 8-byte
16372              aligned quantities.  */
16373           else if (ARM_DOUBLEWORD_ALIGN
16374                    && fix->fix_size < 8
16375                    && mp->fix_size >= 8)
16376             {
16377               min_mp = mp;
16378               min_address = mp->min_address + fix->fix_size;
16379             }
16380         }
16381     }
16382
16383   /* We need to create a new entry.  */
16384   mp = XNEW (Mnode);
16385   mp->fix_size = fix->fix_size;
16386   mp->mode = fix->mode;
16387   mp->value = fix->value;
16388   mp->refcount = 1;
16389   mp->max_address = minipool_barrier->address + 65536;
16390
16391   mp->min_address = min_address;
16392
16393   if (min_mp == NULL)
16394     {
16395       mp->prev = NULL;
16396       mp->next = minipool_vector_head;
16397
16398       if (mp->next == NULL)
16399         {
16400           minipool_vector_tail = mp;
16401           minipool_vector_label = gen_label_rtx ();
16402         }
16403       else
16404         mp->next->prev = mp;
16405
16406       minipool_vector_head = mp;
16407     }
16408   else
16409     {
16410       mp->next = min_mp->next;
16411       mp->prev = min_mp;
16412       min_mp->next = mp;
16413
16414       if (mp->next != NULL)
16415         mp->next->prev = mp;
16416       else
16417         minipool_vector_tail = mp;
16418     }
16419
16420   /* Save the new entry.  */
16421   min_mp = mp;
16422
16423   if (mp->prev)
16424     mp = mp->prev;
16425   else
16426     mp->offset = 0;
16427
16428   /* Scan over the following entries and adjust their offsets.  */
16429   while (mp->next != NULL)
16430     {
16431       if (mp->next->min_address < mp->min_address + mp->fix_size)
16432         mp->next->min_address = mp->min_address + mp->fix_size;
16433
16434       if (mp->refcount)
16435         mp->next->offset = mp->offset + mp->fix_size;
16436       else
16437         mp->next->offset = mp->offset;
16438
16439       mp = mp->next;
16440     }
16441
16442   return min_mp;
16443 }
16444
16445 static void
16446 assign_minipool_offsets (Mfix *barrier)
16447 {
16448   HOST_WIDE_INT offset = 0;
16449   Mnode *mp;
16450
16451   minipool_barrier = barrier;
16452
16453   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16454     {
16455       mp->offset = offset;
16456
16457       if (mp->refcount > 0)
16458         offset += mp->fix_size;
16459     }
16460 }
16461
16462 /* Output the literal table */
16463 static void
16464 dump_minipool (rtx scan)
16465 {
16466   Mnode * mp;
16467   Mnode * nmp;
16468   int align64 = 0;
16469
16470   if (ARM_DOUBLEWORD_ALIGN)
16471     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16472       if (mp->refcount > 0 && mp->fix_size >= 8)
16473         {
16474           align64 = 1;
16475           break;
16476         }
16477
16478   if (dump_file)
16479     fprintf (dump_file,
16480              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16481              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16482
16483   scan = emit_label_after (gen_label_rtx (), scan);
16484   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16485   scan = emit_label_after (minipool_vector_label, scan);
16486
16487   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16488     {
16489       if (mp->refcount > 0)
16490         {
16491           if (dump_file)
16492             {
16493               fprintf (dump_file,
16494                        ";;  Offset %u, min %ld, max %ld ",
16495                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16496                        (unsigned long) mp->max_address);
16497               arm_print_value (dump_file, mp->value);
16498               fputc ('\n', dump_file);
16499             }
16500
16501           switch (mp->fix_size)
16502             {
16503 #ifdef HAVE_consttable_1
16504             case 1:
16505               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16506               break;
16507
16508 #endif
16509 #ifdef HAVE_consttable_2
16510             case 2:
16511               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16512               break;
16513
16514 #endif
16515 #ifdef HAVE_consttable_4
16516             case 4:
16517               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16518               break;
16519
16520 #endif
16521 #ifdef HAVE_consttable_8
16522             case 8:
16523               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16524               break;
16525
16526 #endif
16527 #ifdef HAVE_consttable_16
16528             case 16:
16529               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16530               break;
16531
16532 #endif
16533             default:
16534               gcc_unreachable ();
16535             }
16536         }
16537
16538       nmp = mp->next;
16539       free (mp);
16540     }
16541
16542   minipool_vector_head = minipool_vector_tail = NULL;
16543   scan = emit_insn_after (gen_consttable_end (), scan);
16544   scan = emit_barrier_after (scan);
16545 }
16546
16547 /* Return the cost of forcibly inserting a barrier after INSN.  */
16548 static int
16549 arm_barrier_cost (rtx insn)
16550 {
16551   /* Basing the location of the pool on the loop depth is preferable,
16552      but at the moment, the basic block information seems to be
16553      corrupt by this stage of the compilation.  */
16554   int base_cost = 50;
16555   rtx next = next_nonnote_insn (insn);
16556
16557   if (next != NULL && LABEL_P (next))
16558     base_cost -= 20;
16559
16560   switch (GET_CODE (insn))
16561     {
16562     case CODE_LABEL:
16563       /* It will always be better to place the table before the label, rather
16564          than after it.  */
16565       return 50;
16566
16567     case INSN:
16568     case CALL_INSN:
16569       return base_cost;
16570
16571     case JUMP_INSN:
16572       return base_cost - 10;
16573
16574     default:
16575       return base_cost + 10;
16576     }
16577 }
16578
16579 /* Find the best place in the insn stream in the range
16580    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16581    Create the barrier by inserting a jump and add a new fix entry for
16582    it.  */
16583 static Mfix *
16584 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16585 {
16586   HOST_WIDE_INT count = 0;
16587   rtx barrier;
16588   rtx from = fix->insn;
16589   /* The instruction after which we will insert the jump.  */
16590   rtx selected = NULL;
16591   int selected_cost;
16592   /* The address at which the jump instruction will be placed.  */
16593   HOST_WIDE_INT selected_address;
16594   Mfix * new_fix;
16595   HOST_WIDE_INT max_count = max_address - fix->address;
16596   rtx label = gen_label_rtx ();
16597
16598   selected_cost = arm_barrier_cost (from);
16599   selected_address = fix->address;
16600
16601   while (from && count < max_count)
16602     {
16603       rtx tmp;
16604       int new_cost;
16605
16606       /* This code shouldn't have been called if there was a natural barrier
16607          within range.  */
16608       gcc_assert (!BARRIER_P (from));
16609
16610       /* Count the length of this insn.  This must stay in sync with the
16611          code that pushes minipool fixes.  */
16612       if (LABEL_P (from))
16613         count += get_label_padding (from);
16614       else
16615         count += get_attr_length (from);
16616
16617       /* If there is a jump table, add its length.  */
16618       if (tablejump_p (from, NULL, &tmp))
16619         {
16620           count += get_jump_table_size (tmp);
16621
16622           /* Jump tables aren't in a basic block, so base the cost on
16623              the dispatch insn.  If we select this location, we will
16624              still put the pool after the table.  */
16625           new_cost = arm_barrier_cost (from);
16626
16627           if (count < max_count
16628               && (!selected || new_cost <= selected_cost))
16629             {
16630               selected = tmp;
16631               selected_cost = new_cost;
16632               selected_address = fix->address + count;
16633             }
16634
16635           /* Continue after the dispatch table.  */
16636           from = NEXT_INSN (tmp);
16637           continue;
16638         }
16639
16640       new_cost = arm_barrier_cost (from);
16641
16642       if (count < max_count
16643           && (!selected || new_cost <= selected_cost))
16644         {
16645           selected = from;
16646           selected_cost = new_cost;
16647           selected_address = fix->address + count;
16648         }
16649
16650       from = NEXT_INSN (from);
16651     }
16652
16653   /* Make sure that we found a place to insert the jump.  */
16654   gcc_assert (selected);
16655
16656   /* Make sure we do not split a call and its corresponding
16657      CALL_ARG_LOCATION note.  */
16658   if (CALL_P (selected))
16659     {
16660       rtx next = NEXT_INSN (selected);
16661       if (next && NOTE_P (next)
16662           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16663           selected = next;
16664     }
16665
16666   /* Create a new JUMP_INSN that branches around a barrier.  */
16667   from = emit_jump_insn_after (gen_jump (label), selected);
16668   JUMP_LABEL (from) = label;
16669   barrier = emit_barrier_after (from);
16670   emit_label_after (label, barrier);
16671
16672   /* Create a minipool barrier entry for the new barrier.  */
16673   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16674   new_fix->insn = barrier;
16675   new_fix->address = selected_address;
16676   new_fix->next = fix->next;
16677   fix->next = new_fix;
16678
16679   return new_fix;
16680 }
16681
16682 /* Record that there is a natural barrier in the insn stream at
16683    ADDRESS.  */
16684 static void
16685 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16686 {
16687   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16688
16689   fix->insn = insn;
16690   fix->address = address;
16691
16692   fix->next = NULL;
16693   if (minipool_fix_head != NULL)
16694     minipool_fix_tail->next = fix;
16695   else
16696     minipool_fix_head = fix;
16697
16698   minipool_fix_tail = fix;
16699 }
16700
16701 /* Record INSN, which will need fixing up to load a value from the
16702    minipool.  ADDRESS is the offset of the insn since the start of the
16703    function; LOC is a pointer to the part of the insn which requires
16704    fixing; VALUE is the constant that must be loaded, which is of type
16705    MODE.  */
16706 static void
16707 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16708                    enum machine_mode mode, rtx value)
16709 {
16710   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16711
16712   fix->insn = insn;
16713   fix->address = address;
16714   fix->loc = loc;
16715   fix->mode = mode;
16716   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16717   fix->value = value;
16718   fix->forwards = get_attr_pool_range (insn);
16719   fix->backwards = get_attr_neg_pool_range (insn);
16720   fix->minipool = NULL;
16721
16722   /* If an insn doesn't have a range defined for it, then it isn't
16723      expecting to be reworked by this code.  Better to stop now than
16724      to generate duff assembly code.  */
16725   gcc_assert (fix->forwards || fix->backwards);
16726
16727   /* If an entry requires 8-byte alignment then assume all constant pools
16728      require 4 bytes of padding.  Trying to do this later on a per-pool
16729      basis is awkward because existing pool entries have to be modified.  */
16730   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16731     minipool_pad = 4;
16732
16733   if (dump_file)
16734     {
16735       fprintf (dump_file,
16736                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16737                GET_MODE_NAME (mode),
16738                INSN_UID (insn), (unsigned long) address,
16739                -1 * (long)fix->backwards, (long)fix->forwards);
16740       arm_print_value (dump_file, fix->value);
16741       fprintf (dump_file, "\n");
16742     }
16743
16744   /* Add it to the chain of fixes.  */
16745   fix->next = NULL;
16746
16747   if (minipool_fix_head != NULL)
16748     minipool_fix_tail->next = fix;
16749   else
16750     minipool_fix_head = fix;
16751
16752   minipool_fix_tail = fix;
16753 }
16754
16755 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16756    Returns the number of insns needed, or 99 if we always want to synthesize
16757    the value.  */
16758 int
16759 arm_max_const_double_inline_cost ()
16760 {
16761   /* Let the value get synthesized to avoid the use of literal pools.  */
16762   if (arm_disable_literal_pool)
16763     return 99;
16764
16765   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16766 }
16767
16768 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16769    Returns the number of insns needed, or 99 if we don't know how to
16770    do it.  */
16771 int
16772 arm_const_double_inline_cost (rtx val)
16773 {
16774   rtx lowpart, highpart;
16775   enum machine_mode mode;
16776
16777   mode = GET_MODE (val);
16778
16779   if (mode == VOIDmode)
16780     mode = DImode;
16781
16782   gcc_assert (GET_MODE_SIZE (mode) == 8);
16783
16784   lowpart = gen_lowpart (SImode, val);
16785   highpart = gen_highpart_mode (SImode, mode, val);
16786
16787   gcc_assert (CONST_INT_P (lowpart));
16788   gcc_assert (CONST_INT_P (highpart));
16789
16790   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16791                             NULL_RTX, NULL_RTX, 0, 0)
16792           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16793                               NULL_RTX, NULL_RTX, 0, 0));
16794 }
16795
16796 /* Return true if it is worthwhile to split a 64-bit constant into two
16797    32-bit operations.  This is the case if optimizing for size, or
16798    if we have load delay slots, or if one 32-bit part can be done with
16799    a single data operation.  */
16800 bool
16801 arm_const_double_by_parts (rtx val)
16802 {
16803   enum machine_mode mode = GET_MODE (val);
16804   rtx part;
16805
16806   if (optimize_size || arm_ld_sched)
16807     return true;
16808
16809   if (mode == VOIDmode)
16810     mode = DImode;
16811
16812   part = gen_highpart_mode (SImode, mode, val);
16813
16814   gcc_assert (CONST_INT_P (part));
16815
16816   if (const_ok_for_arm (INTVAL (part))
16817       || const_ok_for_arm (~INTVAL (part)))
16818     return true;
16819
16820   part = gen_lowpart (SImode, val);
16821
16822   gcc_assert (CONST_INT_P (part));
16823
16824   if (const_ok_for_arm (INTVAL (part))
16825       || const_ok_for_arm (~INTVAL (part)))
16826     return true;
16827
16828   return false;
16829 }
16830
16831 /* Return true if it is possible to inline both the high and low parts
16832    of a 64-bit constant into 32-bit data processing instructions.  */
16833 bool
16834 arm_const_double_by_immediates (rtx val)
16835 {
16836   enum machine_mode mode = GET_MODE (val);
16837   rtx part;
16838
16839   if (mode == VOIDmode)
16840     mode = DImode;
16841
16842   part = gen_highpart_mode (SImode, mode, val);
16843
16844   gcc_assert (CONST_INT_P (part));
16845
16846   if (!const_ok_for_arm (INTVAL (part)))
16847     return false;
16848
16849   part = gen_lowpart (SImode, val);
16850
16851   gcc_assert (CONST_INT_P (part));
16852
16853   if (!const_ok_for_arm (INTVAL (part)))
16854     return false;
16855
16856   return true;
16857 }
16858
16859 /* Scan INSN and note any of its operands that need fixing.
16860    If DO_PUSHES is false we do not actually push any of the fixups
16861    needed.  */
16862 static void
16863 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16864 {
16865   int opno;
16866
16867   extract_insn (insn);
16868
16869   if (!constrain_operands (1))
16870     fatal_insn_not_found (insn);
16871
16872   if (recog_data.n_alternatives == 0)
16873     return;
16874
16875   /* Fill in recog_op_alt with information about the constraints of
16876      this insn.  */
16877   preprocess_constraints ();
16878
16879   for (opno = 0; opno < recog_data.n_operands; opno++)
16880     {
16881       /* Things we need to fix can only occur in inputs.  */
16882       if (recog_data.operand_type[opno] != OP_IN)
16883         continue;
16884
16885       /* If this alternative is a memory reference, then any mention
16886          of constants in this alternative is really to fool reload
16887          into allowing us to accept one there.  We need to fix them up
16888          now so that we output the right code.  */
16889       if (recog_op_alt[opno][which_alternative].memory_ok)
16890         {
16891           rtx op = recog_data.operand[opno];
16892
16893           if (CONSTANT_P (op))
16894             {
16895               if (do_pushes)
16896                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16897                                    recog_data.operand_mode[opno], op);
16898             }
16899           else if (MEM_P (op)
16900                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16901                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16902             {
16903               if (do_pushes)
16904                 {
16905                   rtx cop = avoid_constant_pool_reference (op);
16906
16907                   /* Casting the address of something to a mode narrower
16908                      than a word can cause avoid_constant_pool_reference()
16909                      to return the pool reference itself.  That's no good to
16910                      us here.  Lets just hope that we can use the
16911                      constant pool value directly.  */
16912                   if (op == cop)
16913                     cop = get_pool_constant (XEXP (op, 0));
16914
16915                   push_minipool_fix (insn, address,
16916                                      recog_data.operand_loc[opno],
16917                                      recog_data.operand_mode[opno], cop);
16918                 }
16919
16920             }
16921         }
16922     }
16923
16924   return;
16925 }
16926
16927 /* Rewrite move insn into subtract of 0 if the condition codes will
16928    be useful in next conditional jump insn.  */
16929
16930 static void
16931 thumb1_reorg (void)
16932 {
16933   basic_block bb;
16934
16935   FOR_EACH_BB_FN (bb, cfun)
16936     {
16937       rtx dest, src;
16938       rtx pat, op0, set = NULL;
16939       rtx prev, insn = BB_END (bb);
16940       bool insn_clobbered = false;
16941
16942       while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16943         insn = PREV_INSN (insn);
16944
16945       /* Find the last cbranchsi4_insn in basic block BB.  */
16946       if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16947         continue;
16948
16949       /* Get the register with which we are comparing.  */
16950       pat = PATTERN (insn);
16951       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16952
16953       /* Find the first flag setting insn before INSN in basic block BB.  */
16954       gcc_assert (insn != BB_HEAD (bb));
16955       for (prev = PREV_INSN (insn);
16956            (!insn_clobbered
16957             && prev != BB_HEAD (bb)
16958             && (NOTE_P (prev)
16959                 || DEBUG_INSN_P (prev)
16960                 || ((set = single_set (prev)) != NULL
16961                     && get_attr_conds (prev) == CONDS_NOCOND)));
16962            prev = PREV_INSN (prev))
16963         {
16964           if (reg_set_p (op0, prev))
16965             insn_clobbered = true;
16966         }
16967
16968       /* Skip if op0 is clobbered by insn other than prev. */
16969       if (insn_clobbered)
16970         continue;
16971
16972       if (!set)
16973         continue;
16974
16975       dest = SET_DEST (set);
16976       src = SET_SRC (set);
16977       if (!low_register_operand (dest, SImode)
16978           || !low_register_operand (src, SImode))
16979         continue;
16980
16981       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16982          in INSN.  Both src and dest of the move insn are checked.  */
16983       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16984         {
16985           dest = copy_rtx (dest);
16986           src = copy_rtx (src);
16987           src = gen_rtx_MINUS (SImode, src, const0_rtx);
16988           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16989           INSN_CODE (prev) = -1;
16990           /* Set test register in INSN to dest.  */
16991           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16992           INSN_CODE (insn) = -1;
16993         }
16994     }
16995 }
16996
16997 /* Convert instructions to their cc-clobbering variant if possible, since
16998    that allows us to use smaller encodings.  */
16999
17000 static void
17001 thumb2_reorg (void)
17002 {
17003   basic_block bb;
17004   regset_head live;
17005
17006   INIT_REG_SET (&live);
17007
17008   /* We are freeing block_for_insn in the toplev to keep compatibility
17009      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17010   compute_bb_for_insn ();
17011   df_analyze ();
17012
17013   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17014
17015   FOR_EACH_BB_FN (bb, cfun)
17016     {
17017       if (current_tune->disparage_flag_setting_t16_encodings
17018           && optimize_bb_for_speed_p (bb))
17019         continue;
17020
17021       rtx insn;
17022       Convert_Action action = SKIP;
17023       Convert_Action action_for_partial_flag_setting
17024         = (current_tune->disparage_partial_flag_setting_t16_encodings
17025            && optimize_bb_for_speed_p (bb))
17026           ? SKIP : CONV;
17027
17028       COPY_REG_SET (&live, DF_LR_OUT (bb));
17029       df_simulate_initialize_backwards (bb, &live);
17030       FOR_BB_INSNS_REVERSE (bb, insn)
17031         {
17032           if (NONJUMP_INSN_P (insn)
17033               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17034               && GET_CODE (PATTERN (insn)) == SET)
17035             {
17036               action = SKIP;
17037               rtx pat = PATTERN (insn);
17038               rtx dst = XEXP (pat, 0);
17039               rtx src = XEXP (pat, 1);
17040               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17041
17042               if (!OBJECT_P (src))
17043                   op0 = XEXP (src, 0);
17044
17045               if (BINARY_P (src))
17046                   op1 = XEXP (src, 1);
17047
17048               if (low_register_operand (dst, SImode))
17049                 {
17050                   switch (GET_CODE (src))
17051                     {
17052                     case PLUS:
17053                       /* Adding two registers and storing the result
17054                          in the first source is already a 16-bit
17055                          operation.  */
17056                       if (rtx_equal_p (dst, op0)
17057                           && register_operand (op1, SImode))
17058                         break;
17059
17060                       if (low_register_operand (op0, SImode))
17061                         {
17062                           /* ADDS <Rd>,<Rn>,<Rm>  */
17063                           if (low_register_operand (op1, SImode))
17064                             action = CONV;
17065                           /* ADDS <Rdn>,#<imm8>  */
17066                           /* SUBS <Rdn>,#<imm8>  */
17067                           else if (rtx_equal_p (dst, op0)
17068                                    && CONST_INT_P (op1)
17069                                    && IN_RANGE (INTVAL (op1), -255, 255))
17070                             action = CONV;
17071                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17072                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17073                           else if (CONST_INT_P (op1)
17074                                    && IN_RANGE (INTVAL (op1), -7, 7))
17075                             action = CONV;
17076                         }
17077                       /* ADCS <Rd>, <Rn>  */
17078                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17079                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17080                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17081                                                        SImode)
17082                               && COMPARISON_P (op1)
17083                               && cc_register (XEXP (op1, 0), VOIDmode)
17084                               && maybe_get_arm_condition_code (op1) == ARM_CS
17085                               && XEXP (op1, 1) == const0_rtx)
17086                         action = CONV;
17087                       break;
17088
17089                     case MINUS:
17090                       /* RSBS <Rd>,<Rn>,#0
17091                          Not handled here: see NEG below.  */
17092                       /* SUBS <Rd>,<Rn>,#<imm3>
17093                          SUBS <Rdn>,#<imm8>
17094                          Not handled here: see PLUS above.  */
17095                       /* SUBS <Rd>,<Rn>,<Rm>  */
17096                       if (low_register_operand (op0, SImode)
17097                           && low_register_operand (op1, SImode))
17098                             action = CONV;
17099                       break;
17100
17101                     case MULT:
17102                       /* MULS <Rdm>,<Rn>,<Rdm>
17103                          As an exception to the rule, this is only used
17104                          when optimizing for size since MULS is slow on all
17105                          known implementations.  We do not even want to use
17106                          MULS in cold code, if optimizing for speed, so we
17107                          test the global flag here.  */
17108                       if (!optimize_size)
17109                         break;
17110                       /* else fall through.  */
17111                     case AND:
17112                     case IOR:
17113                     case XOR:
17114                       /* ANDS <Rdn>,<Rm>  */
17115                       if (rtx_equal_p (dst, op0)
17116                           && low_register_operand (op1, SImode))
17117                         action = action_for_partial_flag_setting;
17118                       else if (rtx_equal_p (dst, op1)
17119                                && low_register_operand (op0, SImode))
17120                         action = action_for_partial_flag_setting == SKIP
17121                                  ? SKIP : SWAP_CONV;
17122                       break;
17123
17124                     case ASHIFTRT:
17125                     case ASHIFT:
17126                     case LSHIFTRT:
17127                       /* ASRS <Rdn>,<Rm> */
17128                       /* LSRS <Rdn>,<Rm> */
17129                       /* LSLS <Rdn>,<Rm> */
17130                       if (rtx_equal_p (dst, op0)
17131                           && low_register_operand (op1, SImode))
17132                         action = action_for_partial_flag_setting;
17133                       /* ASRS <Rd>,<Rm>,#<imm5> */
17134                       /* LSRS <Rd>,<Rm>,#<imm5> */
17135                       /* LSLS <Rd>,<Rm>,#<imm5> */
17136                       else if (low_register_operand (op0, SImode)
17137                                && CONST_INT_P (op1)
17138                                && IN_RANGE (INTVAL (op1), 0, 31))
17139                         action = action_for_partial_flag_setting;
17140                       break;
17141
17142                     case ROTATERT:
17143                       /* RORS <Rdn>,<Rm>  */
17144                       if (rtx_equal_p (dst, op0)
17145                           && low_register_operand (op1, SImode))
17146                         action = action_for_partial_flag_setting;
17147                       break;
17148
17149                     case NOT:
17150                       /* MVNS <Rd>,<Rm>  */
17151                       if (low_register_operand (op0, SImode))
17152                         action = action_for_partial_flag_setting;
17153                       break;
17154
17155                     case NEG:
17156                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17157                       if (low_register_operand (op0, SImode))
17158                         action = CONV;
17159                       break;
17160
17161                     case CONST_INT:
17162                       /* MOVS <Rd>,#<imm8>  */
17163                       if (CONST_INT_P (src)
17164                           && IN_RANGE (INTVAL (src), 0, 255))
17165                         action = action_for_partial_flag_setting;
17166                       break;
17167
17168                     case REG:
17169                       /* MOVS and MOV<c> with registers have different
17170                          encodings, so are not relevant here.  */
17171                       break;
17172
17173                     default:
17174                       break;
17175                     }
17176                 }
17177
17178               if (action != SKIP)
17179                 {
17180                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17181                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17182                   rtvec vec;
17183
17184                   if (action == SWAP_CONV)
17185                     {
17186                       src = copy_rtx (src);
17187                       XEXP (src, 0) = op1;
17188                       XEXP (src, 1) = op0;
17189                       pat = gen_rtx_SET (VOIDmode, dst, src);
17190                       vec = gen_rtvec (2, pat, clobber);
17191                     }
17192                   else /* action == CONV */
17193                     vec = gen_rtvec (2, pat, clobber);
17194
17195                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17196                   INSN_CODE (insn) = -1;
17197                 }
17198             }
17199
17200           if (NONDEBUG_INSN_P (insn))
17201             df_simulate_one_insn_backwards (bb, insn, &live);
17202         }
17203     }
17204
17205   CLEAR_REG_SET (&live);
17206 }
17207
17208 /* Gcc puts the pool in the wrong place for ARM, since we can only
17209    load addresses a limited distance around the pc.  We do some
17210    special munging to move the constant pool values to the correct
17211    point in the code.  */
17212 static void
17213 arm_reorg (void)
17214 {
17215   rtx insn;
17216   HOST_WIDE_INT address = 0;
17217   Mfix * fix;
17218
17219   if (TARGET_THUMB1)
17220     thumb1_reorg ();
17221   else if (TARGET_THUMB2)
17222     thumb2_reorg ();
17223
17224   /* Ensure all insns that must be split have been split at this point.
17225      Otherwise, the pool placement code below may compute incorrect
17226      insn lengths.  Note that when optimizing, all insns have already
17227      been split at this point.  */
17228   if (!optimize)
17229     split_all_insns_noflow ();
17230
17231   minipool_fix_head = minipool_fix_tail = NULL;
17232
17233   /* The first insn must always be a note, or the code below won't
17234      scan it properly.  */
17235   insn = get_insns ();
17236   gcc_assert (NOTE_P (insn));
17237   minipool_pad = 0;
17238
17239   /* Scan all the insns and record the operands that will need fixing.  */
17240   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17241     {
17242       if (BARRIER_P (insn))
17243         push_minipool_barrier (insn, address);
17244       else if (INSN_P (insn))
17245         {
17246           rtx table;
17247
17248           note_invalid_constants (insn, address, true);
17249           address += get_attr_length (insn);
17250
17251           /* If the insn is a vector jump, add the size of the table
17252              and skip the table.  */
17253           if (tablejump_p (insn, NULL, &table))
17254             {
17255               address += get_jump_table_size (table);
17256               insn = table;
17257             }
17258         }
17259       else if (LABEL_P (insn))
17260         /* Add the worst-case padding due to alignment.  We don't add
17261            the _current_ padding because the minipool insertions
17262            themselves might change it.  */
17263         address += get_label_padding (insn);
17264     }
17265
17266   fix = minipool_fix_head;
17267
17268   /* Now scan the fixups and perform the required changes.  */
17269   while (fix)
17270     {
17271       Mfix * ftmp;
17272       Mfix * fdel;
17273       Mfix *  last_added_fix;
17274       Mfix * last_barrier = NULL;
17275       Mfix * this_fix;
17276
17277       /* Skip any further barriers before the next fix.  */
17278       while (fix && BARRIER_P (fix->insn))
17279         fix = fix->next;
17280
17281       /* No more fixes.  */
17282       if (fix == NULL)
17283         break;
17284
17285       last_added_fix = NULL;
17286
17287       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17288         {
17289           if (BARRIER_P (ftmp->insn))
17290             {
17291               if (ftmp->address >= minipool_vector_head->max_address)
17292                 break;
17293
17294               last_barrier = ftmp;
17295             }
17296           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17297             break;
17298
17299           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17300         }
17301
17302       /* If we found a barrier, drop back to that; any fixes that we
17303          could have reached but come after the barrier will now go in
17304          the next mini-pool.  */
17305       if (last_barrier != NULL)
17306         {
17307           /* Reduce the refcount for those fixes that won't go into this
17308              pool after all.  */
17309           for (fdel = last_barrier->next;
17310                fdel && fdel != ftmp;
17311                fdel = fdel->next)
17312             {
17313               fdel->minipool->refcount--;
17314               fdel->minipool = NULL;
17315             }
17316
17317           ftmp = last_barrier;
17318         }
17319       else
17320         {
17321           /* ftmp is first fix that we can't fit into this pool and
17322              there no natural barriers that we could use.  Insert a
17323              new barrier in the code somewhere between the previous
17324              fix and this one, and arrange to jump around it.  */
17325           HOST_WIDE_INT max_address;
17326
17327           /* The last item on the list of fixes must be a barrier, so
17328              we can never run off the end of the list of fixes without
17329              last_barrier being set.  */
17330           gcc_assert (ftmp);
17331
17332           max_address = minipool_vector_head->max_address;
17333           /* Check that there isn't another fix that is in range that
17334              we couldn't fit into this pool because the pool was
17335              already too large: we need to put the pool before such an
17336              instruction.  The pool itself may come just after the
17337              fix because create_fix_barrier also allows space for a
17338              jump instruction.  */
17339           if (ftmp->address < max_address)
17340             max_address = ftmp->address + 1;
17341
17342           last_barrier = create_fix_barrier (last_added_fix, max_address);
17343         }
17344
17345       assign_minipool_offsets (last_barrier);
17346
17347       while (ftmp)
17348         {
17349           if (!BARRIER_P (ftmp->insn)
17350               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17351                   == NULL))
17352             break;
17353
17354           ftmp = ftmp->next;
17355         }
17356
17357       /* Scan over the fixes we have identified for this pool, fixing them
17358          up and adding the constants to the pool itself.  */
17359       for (this_fix = fix; this_fix && ftmp != this_fix;
17360            this_fix = this_fix->next)
17361         if (!BARRIER_P (this_fix->insn))
17362           {
17363             rtx addr
17364               = plus_constant (Pmode,
17365                                gen_rtx_LABEL_REF (VOIDmode,
17366                                                   minipool_vector_label),
17367                                this_fix->minipool->offset);
17368             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17369           }
17370
17371       dump_minipool (last_barrier->insn);
17372       fix = ftmp;
17373     }
17374
17375   /* From now on we must synthesize any constants that we can't handle
17376      directly.  This can happen if the RTL gets split during final
17377      instruction generation.  */
17378   cfun->machine->after_arm_reorg = 1;
17379
17380   /* Free the minipool memory.  */
17381   obstack_free (&minipool_obstack, minipool_startobj);
17382 }
17383 \f
17384 /* Routines to output assembly language.  */
17385
17386 /* If the rtx is the correct value then return the string of the number.
17387    In this way we can ensure that valid double constants are generated even
17388    when cross compiling.  */
17389 const char *
17390 fp_immediate_constant (rtx x)
17391 {
17392   REAL_VALUE_TYPE r;
17393
17394   if (!fp_consts_inited)
17395     init_fp_table ();
17396
17397   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17398
17399   gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17400   return "0";
17401 }
17402
17403 /* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
17404 static const char *
17405 fp_const_from_val (REAL_VALUE_TYPE *r)
17406 {
17407   if (!fp_consts_inited)
17408     init_fp_table ();
17409
17410   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17411   return "0";
17412 }
17413
17414 /* OPERANDS[0] is the entire list of insns that constitute pop,
17415    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17416    is in the list, UPDATE is true iff the list contains explicit
17417    update of base register.  */
17418 void
17419 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17420                          bool update)
17421 {
17422   int i;
17423   char pattern[100];
17424   int offset;
17425   const char *conditional;
17426   int num_saves = XVECLEN (operands[0], 0);
17427   unsigned int regno;
17428   unsigned int regno_base = REGNO (operands[1]);
17429
17430   offset = 0;
17431   offset += update ? 1 : 0;
17432   offset += return_pc ? 1 : 0;
17433
17434   /* Is the base register in the list?  */
17435   for (i = offset; i < num_saves; i++)
17436     {
17437       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17438       /* If SP is in the list, then the base register must be SP.  */
17439       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17440       /* If base register is in the list, there must be no explicit update.  */
17441       if (regno == regno_base)
17442         gcc_assert (!update);
17443     }
17444
17445   conditional = reverse ? "%?%D0" : "%?%d0";
17446   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17447     {
17448       /* Output pop (not stmfd) because it has a shorter encoding.  */
17449       gcc_assert (update);
17450       sprintf (pattern, "pop%s\t{", conditional);
17451     }
17452   else
17453     {
17454       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17455          It's just a convention, their semantics are identical.  */
17456       if (regno_base == SP_REGNUM)
17457         sprintf (pattern, "ldm%sfd\t", conditional);
17458       else if (TARGET_UNIFIED_ASM)
17459         sprintf (pattern, "ldmia%s\t", conditional);
17460       else
17461         sprintf (pattern, "ldm%sia\t", conditional);
17462
17463       strcat (pattern, reg_names[regno_base]);
17464       if (update)
17465         strcat (pattern, "!, {");
17466       else
17467         strcat (pattern, ", {");
17468     }
17469
17470   /* Output the first destination register.  */
17471   strcat (pattern,
17472           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17473
17474   /* Output the rest of the destination registers.  */
17475   for (i = offset + 1; i < num_saves; i++)
17476     {
17477       strcat (pattern, ", ");
17478       strcat (pattern,
17479               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17480     }
17481
17482   strcat (pattern, "}");
17483
17484   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17485     strcat (pattern, "^");
17486
17487   output_asm_insn (pattern, &cond);
17488 }
17489
17490
17491 /* Output the assembly for a store multiple.  */
17492
17493 const char *
17494 vfp_output_fstmd (rtx * operands)
17495 {
17496   char pattern[100];
17497   int p;
17498   int base;
17499   int i;
17500
17501   strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17502   p = strlen (pattern);
17503
17504   gcc_assert (REG_P (operands[1]));
17505
17506   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17507   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17508     {
17509       p += sprintf (&pattern[p], ", d%d", base + i);
17510     }
17511   strcpy (&pattern[p], "}");
17512
17513   output_asm_insn (pattern, operands);
17514   return "";
17515 }
17516
17517
17518 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17519    number of bytes pushed.  */
17520
17521 static int
17522 vfp_emit_fstmd (int base_reg, int count)
17523 {
17524   rtx par;
17525   rtx dwarf;
17526   rtx tmp, reg;
17527   int i;
17528
17529   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17530      register pairs are stored by a store multiple insn.  We avoid this
17531      by pushing an extra pair.  */
17532   if (count == 2 && !arm_arch6)
17533     {
17534       if (base_reg == LAST_VFP_REGNUM - 3)
17535         base_reg -= 2;
17536       count++;
17537     }
17538
17539   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17540      larger stores into multiple parts (up to a maximum of two, in
17541      practice).  */
17542   if (count > 16)
17543     {
17544       int saved;
17545       /* NOTE: base_reg is an internal register number, so each D register
17546          counts as 2.  */
17547       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17548       saved += vfp_emit_fstmd (base_reg, 16);
17549       return saved;
17550     }
17551
17552   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17553   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17554
17555   reg = gen_rtx_REG (DFmode, base_reg);
17556   base_reg += 2;
17557
17558   XVECEXP (par, 0, 0)
17559     = gen_rtx_SET (VOIDmode,
17560                    gen_frame_mem
17561                    (BLKmode,
17562                     gen_rtx_PRE_MODIFY (Pmode,
17563                                         stack_pointer_rtx,
17564                                         plus_constant
17565                                         (Pmode, stack_pointer_rtx,
17566                                          - (count * 8)))
17567                     ),
17568                    gen_rtx_UNSPEC (BLKmode,
17569                                    gen_rtvec (1, reg),
17570                                    UNSPEC_PUSH_MULT));
17571
17572   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17573                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17574   RTX_FRAME_RELATED_P (tmp) = 1;
17575   XVECEXP (dwarf, 0, 0) = tmp;
17576
17577   tmp = gen_rtx_SET (VOIDmode,
17578                      gen_frame_mem (DFmode, stack_pointer_rtx),
17579                      reg);
17580   RTX_FRAME_RELATED_P (tmp) = 1;
17581   XVECEXP (dwarf, 0, 1) = tmp;
17582
17583   for (i = 1; i < count; i++)
17584     {
17585       reg = gen_rtx_REG (DFmode, base_reg);
17586       base_reg += 2;
17587       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17588
17589       tmp = gen_rtx_SET (VOIDmode,
17590                          gen_frame_mem (DFmode,
17591                                         plus_constant (Pmode,
17592                                                        stack_pointer_rtx,
17593                                                        i * 8)),
17594                          reg);
17595       RTX_FRAME_RELATED_P (tmp) = 1;
17596       XVECEXP (dwarf, 0, i + 1) = tmp;
17597     }
17598
17599   par = emit_insn (par);
17600   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17601   RTX_FRAME_RELATED_P (par) = 1;
17602
17603   return count * 8;
17604 }
17605
17606 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17607    the call target.  */
17608
17609 void
17610 arm_emit_call_insn (rtx pat, rtx addr)
17611 {
17612   rtx insn;
17613
17614   insn = emit_call_insn (pat);
17615
17616   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17617      If the call might use such an entry, add a use of the PIC register
17618      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17619   if (TARGET_VXWORKS_RTP
17620       && flag_pic
17621       && GET_CODE (addr) == SYMBOL_REF
17622       && (SYMBOL_REF_DECL (addr)
17623           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17624           : !SYMBOL_REF_LOCAL_P (addr)))
17625     {
17626       require_pic_register ();
17627       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17628     }
17629 }
17630
17631 /* Output a 'call' insn.  */
17632 const char *
17633 output_call (rtx *operands)
17634 {
17635   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17636
17637   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17638   if (REGNO (operands[0]) == LR_REGNUM)
17639     {
17640       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17641       output_asm_insn ("mov%?\t%0, %|lr", operands);
17642     }
17643
17644   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17645
17646   if (TARGET_INTERWORK || arm_arch4t)
17647     output_asm_insn ("bx%?\t%0", operands);
17648   else
17649     output_asm_insn ("mov%?\t%|pc, %0", operands);
17650
17651   return "";
17652 }
17653
17654 /* Output a 'call' insn that is a reference in memory. This is
17655    disabled for ARMv5 and we prefer a blx instead because otherwise
17656    there's a significant performance overhead.  */
17657 const char *
17658 output_call_mem (rtx *operands)
17659 {
17660   gcc_assert (!arm_arch5);
17661   if (TARGET_INTERWORK)
17662     {
17663       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17664       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17665       output_asm_insn ("bx%?\t%|ip", operands);
17666     }
17667   else if (regno_use_in (LR_REGNUM, operands[0]))
17668     {
17669       /* LR is used in the memory address.  We load the address in the
17670          first instruction.  It's safe to use IP as the target of the
17671          load since the call will kill it anyway.  */
17672       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17673       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17674       if (arm_arch4t)
17675         output_asm_insn ("bx%?\t%|ip", operands);
17676       else
17677         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17678     }
17679   else
17680     {
17681       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17682       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17683     }
17684
17685   return "";
17686 }
17687
17688
17689 /* Output a move from arm registers to arm registers of a long double
17690    OPERANDS[0] is the destination.
17691    OPERANDS[1] is the source.  */
17692 const char *
17693 output_mov_long_double_arm_from_arm (rtx *operands)
17694 {
17695   /* We have to be careful here because the two might overlap.  */
17696   int dest_start = REGNO (operands[0]);
17697   int src_start = REGNO (operands[1]);
17698   rtx ops[2];
17699   int i;
17700
17701   if (dest_start < src_start)
17702     {
17703       for (i = 0; i < 3; i++)
17704         {
17705           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17706           ops[1] = gen_rtx_REG (SImode, src_start + i);
17707           output_asm_insn ("mov%?\t%0, %1", ops);
17708         }
17709     }
17710   else
17711     {
17712       for (i = 2; i >= 0; i--)
17713         {
17714           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17715           ops[1] = gen_rtx_REG (SImode, src_start + i);
17716           output_asm_insn ("mov%?\t%0, %1", ops);
17717         }
17718     }
17719
17720   return "";
17721 }
17722
17723 void
17724 arm_emit_movpair (rtx dest, rtx src)
17725  {
17726   /* If the src is an immediate, simplify it.  */
17727   if (CONST_INT_P (src))
17728     {
17729       HOST_WIDE_INT val = INTVAL (src);
17730       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17731       if ((val >> 16) & 0x0000ffff)
17732         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17733                                              GEN_INT (16)),
17734                        GEN_INT ((val >> 16) & 0x0000ffff));
17735       return;
17736     }
17737    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17738    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17739  }
17740
17741 /* Output a move between double words.  It must be REG<-MEM
17742    or MEM<-REG.  */
17743 const char *
17744 output_move_double (rtx *operands, bool emit, int *count)
17745 {
17746   enum rtx_code code0 = GET_CODE (operands[0]);
17747   enum rtx_code code1 = GET_CODE (operands[1]);
17748   rtx otherops[3];
17749   if (count)
17750     *count = 1;
17751
17752   /* The only case when this might happen is when
17753      you are looking at the length of a DImode instruction
17754      that has an invalid constant in it.  */
17755   if (code0 == REG && code1 != MEM)
17756     {
17757       gcc_assert (!emit);
17758       *count = 2;
17759       return "";
17760     }
17761
17762   if (code0 == REG)
17763     {
17764       unsigned int reg0 = REGNO (operands[0]);
17765
17766       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17767
17768       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17769
17770       switch (GET_CODE (XEXP (operands[1], 0)))
17771         {
17772         case REG:
17773
17774           if (emit)
17775             {
17776               if (TARGET_LDRD
17777                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17778                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17779               else
17780                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17781             }
17782           break;
17783
17784         case PRE_INC:
17785           gcc_assert (TARGET_LDRD);
17786           if (emit)
17787             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17788           break;
17789
17790         case PRE_DEC:
17791           if (emit)
17792             {
17793               if (TARGET_LDRD)
17794                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17795               else
17796                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17797             }
17798           break;
17799
17800         case POST_INC:
17801           if (emit)
17802             {
17803               if (TARGET_LDRD)
17804                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17805               else
17806                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17807             }
17808           break;
17809
17810         case POST_DEC:
17811           gcc_assert (TARGET_LDRD);
17812           if (emit)
17813             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17814           break;
17815
17816         case PRE_MODIFY:
17817         case POST_MODIFY:
17818           /* Autoicrement addressing modes should never have overlapping
17819              base and destination registers, and overlapping index registers
17820              are already prohibited, so this doesn't need to worry about
17821              fix_cm3_ldrd.  */
17822           otherops[0] = operands[0];
17823           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17824           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17825
17826           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17827             {
17828               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17829                 {
17830                   /* Registers overlap so split out the increment.  */
17831                   if (emit)
17832                     {
17833                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17834                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17835                     }
17836                   if (count)
17837                     *count = 2;
17838                 }
17839               else
17840                 {
17841                   /* Use a single insn if we can.
17842                      FIXME: IWMMXT allows offsets larger than ldrd can
17843                      handle, fix these up with a pair of ldr.  */
17844                   if (TARGET_THUMB2
17845                       || !CONST_INT_P (otherops[2])
17846                       || (INTVAL (otherops[2]) > -256
17847                           && INTVAL (otherops[2]) < 256))
17848                     {
17849                       if (emit)
17850                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17851                     }
17852                   else
17853                     {
17854                       if (emit)
17855                         {
17856                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17857                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17858                         }
17859                       if (count)
17860                         *count = 2;
17861
17862                     }
17863                 }
17864             }
17865           else
17866             {
17867               /* Use a single insn if we can.
17868                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
17869                  fix these up with a pair of ldr.  */
17870               if (TARGET_THUMB2
17871                   || !CONST_INT_P (otherops[2])
17872                   || (INTVAL (otherops[2]) > -256
17873                       && INTVAL (otherops[2]) < 256))
17874                 {
17875                   if (emit)
17876                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17877                 }
17878               else
17879                 {
17880                   if (emit)
17881                     {
17882                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17883                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17884                     }
17885                   if (count)
17886                     *count = 2;
17887                 }
17888             }
17889           break;
17890
17891         case LABEL_REF:
17892         case CONST:
17893           /* We might be able to use ldrd %0, %1 here.  However the range is
17894              different to ldr/adr, and it is broken on some ARMv7-M
17895              implementations.  */
17896           /* Use the second register of the pair to avoid problematic
17897              overlap.  */
17898           otherops[1] = operands[1];
17899           if (emit)
17900             output_asm_insn ("adr%?\t%0, %1", otherops);
17901           operands[1] = otherops[0];
17902           if (emit)
17903             {
17904               if (TARGET_LDRD)
17905                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17906               else
17907                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17908             }
17909
17910           if (count)
17911             *count = 2;
17912           break;
17913
17914           /* ??? This needs checking for thumb2.  */
17915         default:
17916           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17917                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17918             {
17919               otherops[0] = operands[0];
17920               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17921               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17922
17923               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17924                 {
17925                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17926                     {
17927                       switch ((int) INTVAL (otherops[2]))
17928                         {
17929                         case -8:
17930                           if (emit)
17931                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17932                           return "";
17933                         case -4:
17934                           if (TARGET_THUMB2)
17935                             break;
17936                           if (emit)
17937                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17938                           return "";
17939                         case 4:
17940                           if (TARGET_THUMB2)
17941                             break;
17942                           if (emit)
17943                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17944                           return "";
17945                         }
17946                     }
17947                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17948                   operands[1] = otherops[0];
17949                   if (TARGET_LDRD
17950                       && (REG_P (otherops[2])
17951                           || TARGET_THUMB2
17952                           || (CONST_INT_P (otherops[2])
17953                               && INTVAL (otherops[2]) > -256
17954                               && INTVAL (otherops[2]) < 256)))
17955                     {
17956                       if (reg_overlap_mentioned_p (operands[0],
17957                                                    otherops[2]))
17958                         {
17959                           rtx tmp;
17960                           /* Swap base and index registers over to
17961                              avoid a conflict.  */
17962                           tmp = otherops[1];
17963                           otherops[1] = otherops[2];
17964                           otherops[2] = tmp;
17965                         }
17966                       /* If both registers conflict, it will usually
17967                          have been fixed by a splitter.  */
17968                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
17969                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17970                         {
17971                           if (emit)
17972                             {
17973                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
17974                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17975                             }
17976                           if (count)
17977                             *count = 2;
17978                         }
17979                       else
17980                         {
17981                           otherops[0] = operands[0];
17982                           if (emit)
17983                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17984                         }
17985                       return "";
17986                     }
17987
17988                   if (CONST_INT_P (otherops[2]))
17989                     {
17990                       if (emit)
17991                         {
17992                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17993                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17994                           else
17995                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
17996                         }
17997                     }
17998                   else
17999                     {
18000                       if (emit)
18001                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18002                     }
18003                 }
18004               else
18005                 {
18006                   if (emit)
18007                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18008                 }
18009
18010               if (count)
18011                 *count = 2;
18012
18013               if (TARGET_LDRD)
18014                 return "ldr%(d%)\t%0, [%1]";
18015
18016               return "ldm%(ia%)\t%1, %M0";
18017             }
18018           else
18019             {
18020               otherops[1] = adjust_address (operands[1], SImode, 4);
18021               /* Take care of overlapping base/data reg.  */
18022               if (reg_mentioned_p (operands[0], operands[1]))
18023                 {
18024                   if (emit)
18025                     {
18026                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18027                       output_asm_insn ("ldr%?\t%0, %1", operands);
18028                     }
18029                   if (count)
18030                     *count = 2;
18031
18032                 }
18033               else
18034                 {
18035                   if (emit)
18036                     {
18037                       output_asm_insn ("ldr%?\t%0, %1", operands);
18038                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18039                     }
18040                   if (count)
18041                     *count = 2;
18042                 }
18043             }
18044         }
18045     }
18046   else
18047     {
18048       /* Constraints should ensure this.  */
18049       gcc_assert (code0 == MEM && code1 == REG);
18050       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18051                   || (TARGET_ARM && TARGET_LDRD));
18052
18053       switch (GET_CODE (XEXP (operands[0], 0)))
18054         {
18055         case REG:
18056           if (emit)
18057             {
18058               if (TARGET_LDRD)
18059                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18060               else
18061                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18062             }
18063           break;
18064
18065         case PRE_INC:
18066           gcc_assert (TARGET_LDRD);
18067           if (emit)
18068             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18069           break;
18070
18071         case PRE_DEC:
18072           if (emit)
18073             {
18074               if (TARGET_LDRD)
18075                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18076               else
18077                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18078             }
18079           break;
18080
18081         case POST_INC:
18082           if (emit)
18083             {
18084               if (TARGET_LDRD)
18085                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18086               else
18087                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18088             }
18089           break;
18090
18091         case POST_DEC:
18092           gcc_assert (TARGET_LDRD);
18093           if (emit)
18094             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18095           break;
18096
18097         case PRE_MODIFY:
18098         case POST_MODIFY:
18099           otherops[0] = operands[1];
18100           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18101           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18102
18103           /* IWMMXT allows offsets larger than ldrd can handle,
18104              fix these up with a pair of ldr.  */
18105           if (!TARGET_THUMB2
18106               && CONST_INT_P (otherops[2])
18107               && (INTVAL(otherops[2]) <= -256
18108                   || INTVAL(otherops[2]) >= 256))
18109             {
18110               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18111                 {
18112                   if (emit)
18113                     {
18114                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18115                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18116                     }
18117                   if (count)
18118                     *count = 2;
18119                 }
18120               else
18121                 {
18122                   if (emit)
18123                     {
18124                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18125                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18126                     }
18127                   if (count)
18128                     *count = 2;
18129                 }
18130             }
18131           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18132             {
18133               if (emit)
18134                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18135             }
18136           else
18137             {
18138               if (emit)
18139                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18140             }
18141           break;
18142
18143         case PLUS:
18144           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18145           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18146             {
18147               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18148                 {
18149                 case -8:
18150                   if (emit)
18151                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18152                   return "";
18153
18154                 case -4:
18155                   if (TARGET_THUMB2)
18156                     break;
18157                   if (emit)
18158                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18159                   return "";
18160
18161                 case 4:
18162                   if (TARGET_THUMB2)
18163                     break;
18164                   if (emit)
18165                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18166                   return "";
18167                 }
18168             }
18169           if (TARGET_LDRD
18170               && (REG_P (otherops[2])
18171                   || TARGET_THUMB2
18172                   || (CONST_INT_P (otherops[2])
18173                       && INTVAL (otherops[2]) > -256
18174                       && INTVAL (otherops[2]) < 256)))
18175             {
18176               otherops[0] = operands[1];
18177               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18178               if (emit)
18179                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18180               return "";
18181             }
18182           /* Fall through */
18183
18184         default:
18185           otherops[0] = adjust_address (operands[0], SImode, 4);
18186           otherops[1] = operands[1];
18187           if (emit)
18188             {
18189               output_asm_insn ("str%?\t%1, %0", operands);
18190               output_asm_insn ("str%?\t%H1, %0", otherops);
18191             }
18192           if (count)
18193             *count = 2;
18194         }
18195     }
18196
18197   return "";
18198 }
18199
18200 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18201    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18202
18203 const char *
18204 output_move_quad (rtx *operands)
18205 {
18206   if (REG_P (operands[0]))
18207     {
18208       /* Load, or reg->reg move.  */
18209
18210       if (MEM_P (operands[1]))
18211         {
18212           switch (GET_CODE (XEXP (operands[1], 0)))
18213             {
18214             case REG:
18215               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18216               break;
18217
18218             case LABEL_REF:
18219             case CONST:
18220               output_asm_insn ("adr%?\t%0, %1", operands);
18221               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18222               break;
18223
18224             default:
18225               gcc_unreachable ();
18226             }
18227         }
18228       else
18229         {
18230           rtx ops[2];
18231           int dest, src, i;
18232
18233           gcc_assert (REG_P (operands[1]));
18234
18235           dest = REGNO (operands[0]);
18236           src = REGNO (operands[1]);
18237
18238           /* This seems pretty dumb, but hopefully GCC won't try to do it
18239              very often.  */
18240           if (dest < src)
18241             for (i = 0; i < 4; i++)
18242               {
18243                 ops[0] = gen_rtx_REG (SImode, dest + i);
18244                 ops[1] = gen_rtx_REG (SImode, src + i);
18245                 output_asm_insn ("mov%?\t%0, %1", ops);
18246               }
18247           else
18248             for (i = 3; i >= 0; i--)
18249               {
18250                 ops[0] = gen_rtx_REG (SImode, dest + i);
18251                 ops[1] = gen_rtx_REG (SImode, src + i);
18252                 output_asm_insn ("mov%?\t%0, %1", ops);
18253               }
18254         }
18255     }
18256   else
18257     {
18258       gcc_assert (MEM_P (operands[0]));
18259       gcc_assert (REG_P (operands[1]));
18260       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18261
18262       switch (GET_CODE (XEXP (operands[0], 0)))
18263         {
18264         case REG:
18265           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18266           break;
18267
18268         default:
18269           gcc_unreachable ();
18270         }
18271     }
18272
18273   return "";
18274 }
18275
18276 /* Output a VFP load or store instruction.  */
18277
18278 const char *
18279 output_move_vfp (rtx *operands)
18280 {
18281   rtx reg, mem, addr, ops[2];
18282   int load = REG_P (operands[0]);
18283   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18284   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18285   const char *templ;
18286   char buff[50];
18287   enum machine_mode mode;
18288
18289   reg = operands[!load];
18290   mem = operands[load];
18291
18292   mode = GET_MODE (reg);
18293
18294   gcc_assert (REG_P (reg));
18295   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18296   gcc_assert (mode == SFmode
18297               || mode == DFmode
18298               || mode == SImode
18299               || mode == DImode
18300               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18301   gcc_assert (MEM_P (mem));
18302
18303   addr = XEXP (mem, 0);
18304
18305   switch (GET_CODE (addr))
18306     {
18307     case PRE_DEC:
18308       templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18309       ops[0] = XEXP (addr, 0);
18310       ops[1] = reg;
18311       break;
18312
18313     case POST_INC:
18314       templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18315       ops[0] = XEXP (addr, 0);
18316       ops[1] = reg;
18317       break;
18318
18319     default:
18320       templ = "f%s%c%%?\t%%%s0, %%1%s";
18321       ops[0] = reg;
18322       ops[1] = mem;
18323       break;
18324     }
18325
18326   sprintf (buff, templ,
18327            load ? "ld" : "st",
18328            dp ? 'd' : 's',
18329            dp ? "P" : "",
18330            integer_p ? "\t%@ int" : "");
18331   output_asm_insn (buff, ops);
18332
18333   return "";
18334 }
18335
18336 /* Output a Neon double-word or quad-word load or store, or a load
18337    or store for larger structure modes.
18338
18339    WARNING: The ordering of elements is weird in big-endian mode,
18340    because the EABI requires that vectors stored in memory appear
18341    as though they were stored by a VSTM, as required by the EABI.
18342    GCC RTL defines element ordering based on in-memory order.
18343    This can be different from the architectural ordering of elements
18344    within a NEON register. The intrinsics defined in arm_neon.h use the
18345    NEON register element ordering, not the GCC RTL element ordering.
18346
18347    For example, the in-memory ordering of a big-endian a quadword
18348    vector with 16-bit elements when stored from register pair {d0,d1}
18349    will be (lowest address first, d0[N] is NEON register element N):
18350
18351      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18352
18353    When necessary, quadword registers (dN, dN+1) are moved to ARM
18354    registers from rN in the order:
18355
18356      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18357
18358    So that STM/LDM can be used on vectors in ARM registers, and the
18359    same memory layout will result as if VSTM/VLDM were used.
18360
18361    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18362    possible, which allows use of appropriate alignment tags.
18363    Note that the choice of "64" is independent of the actual vector
18364    element size; this size simply ensures that the behavior is
18365    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18366
18367    Due to limitations of those instructions, use of VST1.64/VLD1.64
18368    is not possible if:
18369     - the address contains PRE_DEC, or
18370     - the mode refers to more than 4 double-word registers
18371
18372    In those cases, it would be possible to replace VSTM/VLDM by a
18373    sequence of instructions; this is not currently implemented since
18374    this is not certain to actually improve performance.  */
18375
18376 const char *
18377 output_move_neon (rtx *operands)
18378 {
18379   rtx reg, mem, addr, ops[2];
18380   int regno, nregs, load = REG_P (operands[0]);
18381   const char *templ;
18382   char buff[50];
18383   enum machine_mode mode;
18384
18385   reg = operands[!load];
18386   mem = operands[load];
18387
18388   mode = GET_MODE (reg);
18389
18390   gcc_assert (REG_P (reg));
18391   regno = REGNO (reg);
18392   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18393   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18394               || NEON_REGNO_OK_FOR_QUAD (regno));
18395   gcc_assert (VALID_NEON_DREG_MODE (mode)
18396               || VALID_NEON_QREG_MODE (mode)
18397               || VALID_NEON_STRUCT_MODE (mode));
18398   gcc_assert (MEM_P (mem));
18399
18400   addr = XEXP (mem, 0);
18401
18402   /* Strip off const from addresses like (const (plus (...))).  */
18403   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18404     addr = XEXP (addr, 0);
18405
18406   switch (GET_CODE (addr))
18407     {
18408     case POST_INC:
18409       /* We have to use vldm / vstm for too-large modes.  */
18410       if (nregs > 4)
18411         {
18412           templ = "v%smia%%?\t%%0!, %%h1";
18413           ops[0] = XEXP (addr, 0);
18414         }
18415       else
18416         {
18417           templ = "v%s1.64\t%%h1, %%A0";
18418           ops[0] = mem;
18419         }
18420       ops[1] = reg;
18421       break;
18422
18423     case PRE_DEC:
18424       /* We have to use vldm / vstm in this case, since there is no
18425          pre-decrement form of the vld1 / vst1 instructions.  */
18426       templ = "v%smdb%%?\t%%0!, %%h1";
18427       ops[0] = XEXP (addr, 0);
18428       ops[1] = reg;
18429       break;
18430
18431     case POST_MODIFY:
18432       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18433       gcc_unreachable ();
18434
18435     case LABEL_REF:
18436     case PLUS:
18437       {
18438         int i;
18439         int overlap = -1;
18440         for (i = 0; i < nregs; i++)
18441           {
18442             /* We're only using DImode here because it's a convenient size.  */
18443             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18444             ops[1] = adjust_address (mem, DImode, 8 * i);
18445             if (reg_overlap_mentioned_p (ops[0], mem))
18446               {
18447                 gcc_assert (overlap == -1);
18448                 overlap = i;
18449               }
18450             else
18451               {
18452                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18453                 output_asm_insn (buff, ops);
18454               }
18455           }
18456         if (overlap != -1)
18457           {
18458             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18459             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18460             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18461             output_asm_insn (buff, ops);
18462           }
18463
18464         return "";
18465       }
18466
18467     default:
18468       /* We have to use vldm / vstm for too-large modes.  */
18469       if (nregs > 4)
18470         templ = "v%smia%%?\t%%m0, %%h1";
18471       else
18472         templ = "v%s1.64\t%%h1, %%A0";
18473
18474       ops[0] = mem;
18475       ops[1] = reg;
18476     }
18477
18478   sprintf (buff, templ, load ? "ld" : "st");
18479   output_asm_insn (buff, ops);
18480
18481   return "";
18482 }
18483
18484 /* Compute and return the length of neon_mov<mode>, where <mode> is
18485    one of VSTRUCT modes: EI, OI, CI or XI.  */
18486 int
18487 arm_attr_length_move_neon (rtx insn)
18488 {
18489   rtx reg, mem, addr;
18490   int load;
18491   enum machine_mode mode;
18492
18493   extract_insn_cached (insn);
18494
18495   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18496     {
18497       mode = GET_MODE (recog_data.operand[0]);
18498       switch (mode)
18499         {
18500         case EImode:
18501         case OImode:
18502           return 8;
18503         case CImode:
18504           return 12;
18505         case XImode:
18506           return 16;
18507         default:
18508           gcc_unreachable ();
18509         }
18510     }
18511
18512   load = REG_P (recog_data.operand[0]);
18513   reg = recog_data.operand[!load];
18514   mem = recog_data.operand[load];
18515
18516   gcc_assert (MEM_P (mem));
18517
18518   mode = GET_MODE (reg);
18519   addr = XEXP (mem, 0);
18520
18521   /* Strip off const from addresses like (const (plus (...))).  */
18522   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18523     addr = XEXP (addr, 0);
18524
18525   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18526     {
18527       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18528       return insns * 4;
18529     }
18530   else
18531     return 4;
18532 }
18533
18534 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18535    return zero.  */
18536
18537 int
18538 arm_address_offset_is_imm (rtx insn)
18539 {
18540   rtx mem, addr;
18541
18542   extract_insn_cached (insn);
18543
18544   if (REG_P (recog_data.operand[0]))
18545     return 0;
18546
18547   mem = recog_data.operand[0];
18548
18549   gcc_assert (MEM_P (mem));
18550
18551   addr = XEXP (mem, 0);
18552
18553   if (REG_P (addr)
18554       || (GET_CODE (addr) == PLUS
18555           && REG_P (XEXP (addr, 0))
18556           && CONST_INT_P (XEXP (addr, 1))))
18557     return 1;
18558   else
18559     return 0;
18560 }
18561
18562 /* Output an ADD r, s, #n where n may be too big for one instruction.
18563    If adding zero to one register, output nothing.  */
18564 const char *
18565 output_add_immediate (rtx *operands)
18566 {
18567   HOST_WIDE_INT n = INTVAL (operands[2]);
18568
18569   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18570     {
18571       if (n < 0)
18572         output_multi_immediate (operands,
18573                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18574                                 -n);
18575       else
18576         output_multi_immediate (operands,
18577                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18578                                 n);
18579     }
18580
18581   return "";
18582 }
18583
18584 /* Output a multiple immediate operation.
18585    OPERANDS is the vector of operands referred to in the output patterns.
18586    INSTR1 is the output pattern to use for the first constant.
18587    INSTR2 is the output pattern to use for subsequent constants.
18588    IMMED_OP is the index of the constant slot in OPERANDS.
18589    N is the constant value.  */
18590 static const char *
18591 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18592                         int immed_op, HOST_WIDE_INT n)
18593 {
18594 #if HOST_BITS_PER_WIDE_INT > 32
18595   n &= 0xffffffff;
18596 #endif
18597
18598   if (n == 0)
18599     {
18600       /* Quick and easy output.  */
18601       operands[immed_op] = const0_rtx;
18602       output_asm_insn (instr1, operands);
18603     }
18604   else
18605     {
18606       int i;
18607       const char * instr = instr1;
18608
18609       /* Note that n is never zero here (which would give no output).  */
18610       for (i = 0; i < 32; i += 2)
18611         {
18612           if (n & (3 << i))
18613             {
18614               operands[immed_op] = GEN_INT (n & (255 << i));
18615               output_asm_insn (instr, operands);
18616               instr = instr2;
18617               i += 6;
18618             }
18619         }
18620     }
18621
18622   return "";
18623 }
18624
18625 /* Return the name of a shifter operation.  */
18626 static const char *
18627 arm_shift_nmem(enum rtx_code code)
18628 {
18629   switch (code)
18630     {
18631     case ASHIFT:
18632       return ARM_LSL_NAME;
18633
18634     case ASHIFTRT:
18635       return "asr";
18636
18637     case LSHIFTRT:
18638       return "lsr";
18639
18640     case ROTATERT:
18641       return "ror";
18642
18643     default:
18644       abort();
18645     }
18646 }
18647
18648 /* Return the appropriate ARM instruction for the operation code.
18649    The returned result should not be overwritten.  OP is the rtx of the
18650    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18651    was shifted.  */
18652 const char *
18653 arithmetic_instr (rtx op, int shift_first_arg)
18654 {
18655   switch (GET_CODE (op))
18656     {
18657     case PLUS:
18658       return "add";
18659
18660     case MINUS:
18661       return shift_first_arg ? "rsb" : "sub";
18662
18663     case IOR:
18664       return "orr";
18665
18666     case XOR:
18667       return "eor";
18668
18669     case AND:
18670       return "and";
18671
18672     case ASHIFT:
18673     case ASHIFTRT:
18674     case LSHIFTRT:
18675     case ROTATERT:
18676       return arm_shift_nmem(GET_CODE(op));
18677
18678     default:
18679       gcc_unreachable ();
18680     }
18681 }
18682
18683 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18684    for the operation code.  The returned result should not be overwritten.
18685    OP is the rtx code of the shift.
18686    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18687    shift.  */
18688 static const char *
18689 shift_op (rtx op, HOST_WIDE_INT *amountp)
18690 {
18691   const char * mnem;
18692   enum rtx_code code = GET_CODE (op);
18693
18694   switch (code)
18695     {
18696     case ROTATE:
18697       if (!CONST_INT_P (XEXP (op, 1)))
18698         {
18699           output_operand_lossage ("invalid shift operand");
18700           return NULL;
18701         }
18702
18703       code = ROTATERT;
18704       *amountp = 32 - INTVAL (XEXP (op, 1));
18705       mnem = "ror";
18706       break;
18707
18708     case ASHIFT:
18709     case ASHIFTRT:
18710     case LSHIFTRT:
18711     case ROTATERT:
18712       mnem = arm_shift_nmem(code);
18713       if (CONST_INT_P (XEXP (op, 1)))
18714         {
18715           *amountp = INTVAL (XEXP (op, 1));
18716         }
18717       else if (REG_P (XEXP (op, 1)))
18718         {
18719           *amountp = -1;
18720           return mnem;
18721         }
18722       else
18723         {
18724           output_operand_lossage ("invalid shift operand");
18725           return NULL;
18726         }
18727       break;
18728
18729     case MULT:
18730       /* We never have to worry about the amount being other than a
18731          power of 2, since this case can never be reloaded from a reg.  */
18732       if (!CONST_INT_P (XEXP (op, 1)))
18733         {
18734           output_operand_lossage ("invalid shift operand");
18735           return NULL;
18736         }
18737
18738       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18739
18740       /* Amount must be a power of two.  */
18741       if (*amountp & (*amountp - 1))
18742         {
18743           output_operand_lossage ("invalid shift operand");
18744           return NULL;
18745         }
18746
18747       *amountp = int_log2 (*amountp);
18748       return ARM_LSL_NAME;
18749
18750     default:
18751       output_operand_lossage ("invalid shift operand");
18752       return NULL;
18753     }
18754
18755   /* This is not 100% correct, but follows from the desire to merge
18756      multiplication by a power of 2 with the recognizer for a
18757      shift.  >=32 is not a valid shift for "lsl", so we must try and
18758      output a shift that produces the correct arithmetical result.
18759      Using lsr #32 is identical except for the fact that the carry bit
18760      is not set correctly if we set the flags; but we never use the
18761      carry bit from such an operation, so we can ignore that.  */
18762   if (code == ROTATERT)
18763     /* Rotate is just modulo 32.  */
18764     *amountp &= 31;
18765   else if (*amountp != (*amountp & 31))
18766     {
18767       if (code == ASHIFT)
18768         mnem = "lsr";
18769       *amountp = 32;
18770     }
18771
18772   /* Shifts of 0 are no-ops.  */
18773   if (*amountp == 0)
18774     return NULL;
18775
18776   return mnem;
18777 }
18778
18779 /* Obtain the shift from the POWER of two.  */
18780
18781 static HOST_WIDE_INT
18782 int_log2 (HOST_WIDE_INT power)
18783 {
18784   HOST_WIDE_INT shift = 0;
18785
18786   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18787     {
18788       gcc_assert (shift <= 31);
18789       shift++;
18790     }
18791
18792   return shift;
18793 }
18794
18795 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18796    because /bin/as is horribly restrictive.  The judgement about
18797    whether or not each character is 'printable' (and can be output as
18798    is) or not (and must be printed with an octal escape) must be made
18799    with reference to the *host* character set -- the situation is
18800    similar to that discussed in the comments above pp_c_char in
18801    c-pretty-print.c.  */
18802
18803 #define MAX_ASCII_LEN 51
18804
18805 void
18806 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18807 {
18808   int i;
18809   int len_so_far = 0;
18810
18811   fputs ("\t.ascii\t\"", stream);
18812
18813   for (i = 0; i < len; i++)
18814     {
18815       int c = p[i];
18816
18817       if (len_so_far >= MAX_ASCII_LEN)
18818         {
18819           fputs ("\"\n\t.ascii\t\"", stream);
18820           len_so_far = 0;
18821         }
18822
18823       if (ISPRINT (c))
18824         {
18825           if (c == '\\' || c == '\"')
18826             {
18827               putc ('\\', stream);
18828               len_so_far++;
18829             }
18830           putc (c, stream);
18831           len_so_far++;
18832         }
18833       else
18834         {
18835           fprintf (stream, "\\%03o", c);
18836           len_so_far += 4;
18837         }
18838     }
18839
18840   fputs ("\"\n", stream);
18841 }
18842 \f
18843 /* Compute the register save mask for registers 0 through 12
18844    inclusive.  This code is used by arm_compute_save_reg_mask.  */
18845
18846 static unsigned long
18847 arm_compute_save_reg0_reg12_mask (void)
18848 {
18849   unsigned long func_type = arm_current_func_type ();
18850   unsigned long save_reg_mask = 0;
18851   unsigned int reg;
18852
18853   if (IS_INTERRUPT (func_type))
18854     {
18855       unsigned int max_reg;
18856       /* Interrupt functions must not corrupt any registers,
18857          even call clobbered ones.  If this is a leaf function
18858          we can just examine the registers used by the RTL, but
18859          otherwise we have to assume that whatever function is
18860          called might clobber anything, and so we have to save
18861          all the call-clobbered registers as well.  */
18862       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18863         /* FIQ handlers have registers r8 - r12 banked, so
18864            we only need to check r0 - r7, Normal ISRs only
18865            bank r14 and r15, so we must check up to r12.
18866            r13 is the stack pointer which is always preserved,
18867            so we do not need to consider it here.  */
18868         max_reg = 7;
18869       else
18870         max_reg = 12;
18871
18872       for (reg = 0; reg <= max_reg; reg++)
18873         if (df_regs_ever_live_p (reg)
18874             || (! crtl->is_leaf && call_used_regs[reg]))
18875           save_reg_mask |= (1 << reg);
18876
18877       /* Also save the pic base register if necessary.  */
18878       if (flag_pic
18879           && !TARGET_SINGLE_PIC_BASE
18880           && arm_pic_register != INVALID_REGNUM
18881           && crtl->uses_pic_offset_table)
18882         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18883     }
18884   else if (IS_VOLATILE(func_type))
18885     {
18886       /* For noreturn functions we historically omitted register saves
18887          altogether.  However this really messes up debugging.  As a
18888          compromise save just the frame pointers.  Combined with the link
18889          register saved elsewhere this should be sufficient to get
18890          a backtrace.  */
18891       if (frame_pointer_needed)
18892         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18893       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18894         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18895       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18896         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18897     }
18898   else
18899     {
18900       /* In the normal case we only need to save those registers
18901          which are call saved and which are used by this function.  */
18902       for (reg = 0; reg <= 11; reg++)
18903         if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18904           save_reg_mask |= (1 << reg);
18905
18906       /* Handle the frame pointer as a special case.  */
18907       if (frame_pointer_needed)
18908         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18909
18910       /* If we aren't loading the PIC register,
18911          don't stack it even though it may be live.  */
18912       if (flag_pic
18913           && !TARGET_SINGLE_PIC_BASE
18914           && arm_pic_register != INVALID_REGNUM
18915           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18916               || crtl->uses_pic_offset_table))
18917         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18918
18919       /* The prologue will copy SP into R0, so save it.  */
18920       if (IS_STACKALIGN (func_type))
18921         save_reg_mask |= 1;
18922     }
18923
18924   /* Save registers so the exception handler can modify them.  */
18925   if (crtl->calls_eh_return)
18926     {
18927       unsigned int i;
18928
18929       for (i = 0; ; i++)
18930         {
18931           reg = EH_RETURN_DATA_REGNO (i);
18932           if (reg == INVALID_REGNUM)
18933             break;
18934           save_reg_mask |= 1 << reg;
18935         }
18936     }
18937
18938   return save_reg_mask;
18939 }
18940
18941 /* Return true if r3 is live at the start of the function.  */
18942
18943 static bool
18944 arm_r3_live_at_start_p (void)
18945 {
18946   /* Just look at cfg info, which is still close enough to correct at this
18947      point.  This gives false positives for broken functions that might use
18948      uninitialized data that happens to be allocated in r3, but who cares?  */
18949   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18950 }
18951
18952 /* Compute the number of bytes used to store the static chain register on the
18953    stack, above the stack frame.  We need to know this accurately to get the
18954    alignment of the rest of the stack frame correct.  */
18955
18956 static int
18957 arm_compute_static_chain_stack_bytes (void)
18958 {
18959   /* See the defining assertion in arm_expand_prologue.  */
18960   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18961       && IS_NESTED (arm_current_func_type ())
18962       && arm_r3_live_at_start_p ()
18963       && crtl->args.pretend_args_size == 0)
18964     return 4;
18965
18966   return 0;
18967 }
18968
18969 /* Compute a bit mask of which registers need to be
18970    saved on the stack for the current function.
18971    This is used by arm_get_frame_offsets, which may add extra registers.  */
18972
18973 static unsigned long
18974 arm_compute_save_reg_mask (void)
18975 {
18976   unsigned int save_reg_mask = 0;
18977   unsigned long func_type = arm_current_func_type ();
18978   unsigned int reg;
18979
18980   if (IS_NAKED (func_type))
18981     /* This should never really happen.  */
18982     return 0;
18983
18984   /* If we are creating a stack frame, then we must save the frame pointer,
18985      IP (which will hold the old stack pointer), LR and the PC.  */
18986   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18987     save_reg_mask |=
18988       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18989       | (1 << IP_REGNUM)
18990       | (1 << LR_REGNUM)
18991       | (1 << PC_REGNUM);
18992
18993   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18994
18995   /* Decide if we need to save the link register.
18996      Interrupt routines have their own banked link register,
18997      so they never need to save it.
18998      Otherwise if we do not use the link register we do not need to save
18999      it.  If we are pushing other registers onto the stack however, we
19000      can save an instruction in the epilogue by pushing the link register
19001      now and then popping it back into the PC.  This incurs extra memory
19002      accesses though, so we only do it when optimizing for size, and only
19003      if we know that we will not need a fancy return sequence.  */
19004   if (df_regs_ever_live_p (LR_REGNUM)
19005       || (save_reg_mask
19006           && optimize_size
19007           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19008           && !crtl->calls_eh_return))
19009     save_reg_mask |= 1 << LR_REGNUM;
19010
19011   if (cfun->machine->lr_save_eliminated)
19012     save_reg_mask &= ~ (1 << LR_REGNUM);
19013
19014   if (TARGET_REALLY_IWMMXT
19015       && ((bit_count (save_reg_mask)
19016            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19017                            arm_compute_static_chain_stack_bytes())
19018            ) % 2) != 0)
19019     {
19020       /* The total number of registers that are going to be pushed
19021          onto the stack is odd.  We need to ensure that the stack
19022          is 64-bit aligned before we start to save iWMMXt registers,
19023          and also before we start to create locals.  (A local variable
19024          might be a double or long long which we will load/store using
19025          an iWMMXt instruction).  Therefore we need to push another
19026          ARM register, so that the stack will be 64-bit aligned.  We
19027          try to avoid using the arg registers (r0 -r3) as they might be
19028          used to pass values in a tail call.  */
19029       for (reg = 4; reg <= 12; reg++)
19030         if ((save_reg_mask & (1 << reg)) == 0)
19031           break;
19032
19033       if (reg <= 12)
19034         save_reg_mask |= (1 << reg);
19035       else
19036         {
19037           cfun->machine->sibcall_blocked = 1;
19038           save_reg_mask |= (1 << 3);
19039         }
19040     }
19041
19042   /* We may need to push an additional register for use initializing the
19043      PIC base register.  */
19044   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19045       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19046     {
19047       reg = thumb_find_work_register (1 << 4);
19048       if (!call_used_regs[reg])
19049         save_reg_mask |= (1 << reg);
19050     }
19051
19052   return save_reg_mask;
19053 }
19054
19055
19056 /* Compute a bit mask of which registers need to be
19057    saved on the stack for the current function.  */
19058 static unsigned long
19059 thumb1_compute_save_reg_mask (void)
19060 {
19061   unsigned long mask;
19062   unsigned reg;
19063
19064   mask = 0;
19065   for (reg = 0; reg < 12; reg ++)
19066     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19067       mask |= 1 << reg;
19068
19069   if (flag_pic
19070       && !TARGET_SINGLE_PIC_BASE
19071       && arm_pic_register != INVALID_REGNUM
19072       && crtl->uses_pic_offset_table)
19073     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19074
19075   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19076   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19077     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19078
19079   /* LR will also be pushed if any lo regs are pushed.  */
19080   if (mask & 0xff || thumb_force_lr_save ())
19081     mask |= (1 << LR_REGNUM);
19082
19083   /* Make sure we have a low work register if we need one.
19084      We will need one if we are going to push a high register,
19085      but we are not currently intending to push a low register.  */
19086   if ((mask & 0xff) == 0
19087       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19088     {
19089       /* Use thumb_find_work_register to choose which register
19090          we will use.  If the register is live then we will
19091          have to push it.  Use LAST_LO_REGNUM as our fallback
19092          choice for the register to select.  */
19093       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19094       /* Make sure the register returned by thumb_find_work_register is
19095          not part of the return value.  */
19096       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19097         reg = LAST_LO_REGNUM;
19098
19099       if (! call_used_regs[reg])
19100         mask |= 1 << reg;
19101     }
19102
19103   /* The 504 below is 8 bytes less than 512 because there are two possible
19104      alignment words.  We can't tell here if they will be present or not so we
19105      have to play it safe and assume that they are. */
19106   if ((CALLER_INTERWORKING_SLOT_SIZE +
19107        ROUND_UP_WORD (get_frame_size ()) +
19108        crtl->outgoing_args_size) >= 504)
19109     {
19110       /* This is the same as the code in thumb1_expand_prologue() which
19111          determines which register to use for stack decrement. */
19112       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19113         if (mask & (1 << reg))
19114           break;
19115
19116       if (reg > LAST_LO_REGNUM)
19117         {
19118           /* Make sure we have a register available for stack decrement. */
19119           mask |= 1 << LAST_LO_REGNUM;
19120         }
19121     }
19122
19123   return mask;
19124 }
19125
19126
19127 /* Return the number of bytes required to save VFP registers.  */
19128 static int
19129 arm_get_vfp_saved_size (void)
19130 {
19131   unsigned int regno;
19132   int count;
19133   int saved;
19134
19135   saved = 0;
19136   /* Space for saved VFP registers.  */
19137   if (TARGET_HARD_FLOAT && TARGET_VFP)
19138     {
19139       count = 0;
19140       for (regno = FIRST_VFP_REGNUM;
19141            regno < LAST_VFP_REGNUM;
19142            regno += 2)
19143         {
19144           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19145               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19146             {
19147               if (count > 0)
19148                 {
19149                   /* Workaround ARM10 VFPr1 bug.  */
19150                   if (count == 2 && !arm_arch6)
19151                     count++;
19152                   saved += count * 8;
19153                 }
19154               count = 0;
19155             }
19156           else
19157             count++;
19158         }
19159       if (count > 0)
19160         {
19161           if (count == 2 && !arm_arch6)
19162             count++;
19163           saved += count * 8;
19164         }
19165     }
19166   return saved;
19167 }
19168
19169
19170 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19171    everything bar the final return instruction.  If simple_return is true,
19172    then do not output epilogue, because it has already been emitted in RTL.  */
19173 const char *
19174 output_return_instruction (rtx operand, bool really_return, bool reverse,
19175                            bool simple_return)
19176 {
19177   char conditional[10];
19178   char instr[100];
19179   unsigned reg;
19180   unsigned long live_regs_mask;
19181   unsigned long func_type;
19182   arm_stack_offsets *offsets;
19183
19184   func_type = arm_current_func_type ();
19185
19186   if (IS_NAKED (func_type))
19187     return "";
19188
19189   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19190     {
19191       /* If this function was declared non-returning, and we have
19192          found a tail call, then we have to trust that the called
19193          function won't return.  */
19194       if (really_return)
19195         {
19196           rtx ops[2];
19197
19198           /* Otherwise, trap an attempted return by aborting.  */
19199           ops[0] = operand;
19200           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19201                                        : "abort");
19202           assemble_external_libcall (ops[1]);
19203           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19204         }
19205
19206       return "";
19207     }
19208
19209   gcc_assert (!cfun->calls_alloca || really_return);
19210
19211   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19212
19213   cfun->machine->return_used_this_function = 1;
19214
19215   offsets = arm_get_frame_offsets ();
19216   live_regs_mask = offsets->saved_regs_mask;
19217
19218   if (!simple_return && live_regs_mask)
19219     {
19220       const char * return_reg;
19221
19222       /* If we do not have any special requirements for function exit
19223          (e.g. interworking) then we can load the return address
19224          directly into the PC.  Otherwise we must load it into LR.  */
19225       if (really_return
19226           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19227         return_reg = reg_names[PC_REGNUM];
19228       else
19229         return_reg = reg_names[LR_REGNUM];
19230
19231       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19232         {
19233           /* There are three possible reasons for the IP register
19234              being saved.  1) a stack frame was created, in which case
19235              IP contains the old stack pointer, or 2) an ISR routine
19236              corrupted it, or 3) it was saved to align the stack on
19237              iWMMXt.  In case 1, restore IP into SP, otherwise just
19238              restore IP.  */
19239           if (frame_pointer_needed)
19240             {
19241               live_regs_mask &= ~ (1 << IP_REGNUM);
19242               live_regs_mask |=   (1 << SP_REGNUM);
19243             }
19244           else
19245             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19246         }
19247
19248       /* On some ARM architectures it is faster to use LDR rather than
19249          LDM to load a single register.  On other architectures, the
19250          cost is the same.  In 26 bit mode, or for exception handlers,
19251          we have to use LDM to load the PC so that the CPSR is also
19252          restored.  */
19253       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19254         if (live_regs_mask == (1U << reg))
19255           break;
19256
19257       if (reg <= LAST_ARM_REGNUM
19258           && (reg != LR_REGNUM
19259               || ! really_return
19260               || ! IS_INTERRUPT (func_type)))
19261         {
19262           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19263                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19264         }
19265       else
19266         {
19267           char *p;
19268           int first = 1;
19269
19270           /* Generate the load multiple instruction to restore the
19271              registers.  Note we can get here, even if
19272              frame_pointer_needed is true, but only if sp already
19273              points to the base of the saved core registers.  */
19274           if (live_regs_mask & (1 << SP_REGNUM))
19275             {
19276               unsigned HOST_WIDE_INT stack_adjust;
19277
19278               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19279               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19280
19281               if (stack_adjust && arm_arch5 && TARGET_ARM)
19282                 if (TARGET_UNIFIED_ASM)
19283                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19284                 else
19285                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19286               else
19287                 {
19288                   /* If we can't use ldmib (SA110 bug),
19289                      then try to pop r3 instead.  */
19290                   if (stack_adjust)
19291                     live_regs_mask |= 1 << 3;
19292
19293                   if (TARGET_UNIFIED_ASM)
19294                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19295                   else
19296                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19297                 }
19298             }
19299           else
19300             if (TARGET_UNIFIED_ASM)
19301               sprintf (instr, "pop%s\t{", conditional);
19302             else
19303               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19304
19305           p = instr + strlen (instr);
19306
19307           for (reg = 0; reg <= SP_REGNUM; reg++)
19308             if (live_regs_mask & (1 << reg))
19309               {
19310                 int l = strlen (reg_names[reg]);
19311
19312                 if (first)
19313                   first = 0;
19314                 else
19315                   {
19316                     memcpy (p, ", ", 2);
19317                     p += 2;
19318                   }
19319
19320                 memcpy (p, "%|", 2);
19321                 memcpy (p + 2, reg_names[reg], l);
19322                 p += l + 2;
19323               }
19324
19325           if (live_regs_mask & (1 << LR_REGNUM))
19326             {
19327               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19328               /* If returning from an interrupt, restore the CPSR.  */
19329               if (IS_INTERRUPT (func_type))
19330                 strcat (p, "^");
19331             }
19332           else
19333             strcpy (p, "}");
19334         }
19335
19336       output_asm_insn (instr, & operand);
19337
19338       /* See if we need to generate an extra instruction to
19339          perform the actual function return.  */
19340       if (really_return
19341           && func_type != ARM_FT_INTERWORKED
19342           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19343         {
19344           /* The return has already been handled
19345              by loading the LR into the PC.  */
19346           return "";
19347         }
19348     }
19349
19350   if (really_return)
19351     {
19352       switch ((int) ARM_FUNC_TYPE (func_type))
19353         {
19354         case ARM_FT_ISR:
19355         case ARM_FT_FIQ:
19356           /* ??? This is wrong for unified assembly syntax.  */
19357           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19358           break;
19359
19360         case ARM_FT_INTERWORKED:
19361           sprintf (instr, "bx%s\t%%|lr", conditional);
19362           break;
19363
19364         case ARM_FT_EXCEPTION:
19365           /* ??? This is wrong for unified assembly syntax.  */
19366           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19367           break;
19368
19369         default:
19370           /* Use bx if it's available.  */
19371           if (arm_arch5 || arm_arch4t)
19372             sprintf (instr, "bx%s\t%%|lr", conditional);
19373           else
19374             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19375           break;
19376         }
19377
19378       output_asm_insn (instr, & operand);
19379     }
19380
19381   return "";
19382 }
19383
19384 /* Write the function name into the code section, directly preceding
19385    the function prologue.
19386
19387    Code will be output similar to this:
19388      t0
19389          .ascii "arm_poke_function_name", 0
19390          .align
19391      t1
19392          .word 0xff000000 + (t1 - t0)
19393      arm_poke_function_name
19394          mov     ip, sp
19395          stmfd   sp!, {fp, ip, lr, pc}
19396          sub     fp, ip, #4
19397
19398    When performing a stack backtrace, code can inspect the value
19399    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19400    at location pc - 12 and the top 8 bits are set, then we know
19401    that there is a function name embedded immediately preceding this
19402    location and has length ((pc[-3]) & 0xff000000).
19403
19404    We assume that pc is declared as a pointer to an unsigned long.
19405
19406    It is of no benefit to output the function name if we are assembling
19407    a leaf function.  These function types will not contain a stack
19408    backtrace structure, therefore it is not possible to determine the
19409    function name.  */
19410 void
19411 arm_poke_function_name (FILE *stream, const char *name)
19412 {
19413   unsigned long alignlength;
19414   unsigned long length;
19415   rtx           x;
19416
19417   length      = strlen (name) + 1;
19418   alignlength = ROUND_UP_WORD (length);
19419
19420   ASM_OUTPUT_ASCII (stream, name, length);
19421   ASM_OUTPUT_ALIGN (stream, 2);
19422   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19423   assemble_aligned_integer (UNITS_PER_WORD, x);
19424 }
19425
19426 /* Place some comments into the assembler stream
19427    describing the current function.  */
19428 static void
19429 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19430 {
19431   unsigned long func_type;
19432
19433   /* ??? Do we want to print some of the below anyway?  */
19434   if (TARGET_THUMB1)
19435     return;
19436
19437   /* Sanity check.  */
19438   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19439
19440   func_type = arm_current_func_type ();
19441
19442   switch ((int) ARM_FUNC_TYPE (func_type))
19443     {
19444     default:
19445     case ARM_FT_NORMAL:
19446       break;
19447     case ARM_FT_INTERWORKED:
19448       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19449       break;
19450     case ARM_FT_ISR:
19451       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19452       break;
19453     case ARM_FT_FIQ:
19454       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19455       break;
19456     case ARM_FT_EXCEPTION:
19457       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19458       break;
19459     }
19460
19461   if (IS_NAKED (func_type))
19462     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19463
19464   if (IS_VOLATILE (func_type))
19465     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19466
19467   if (IS_NESTED (func_type))
19468     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19469   if (IS_STACKALIGN (func_type))
19470     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19471
19472   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19473                crtl->args.size,
19474                crtl->args.pretend_args_size, frame_size);
19475
19476   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19477                frame_pointer_needed,
19478                cfun->machine->uses_anonymous_args);
19479
19480   if (cfun->machine->lr_save_eliminated)
19481     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19482
19483   if (crtl->calls_eh_return)
19484     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19485
19486 }
19487
19488 static void
19489 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19490                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19491 {
19492   arm_stack_offsets *offsets;
19493
19494   if (TARGET_THUMB1)
19495     {
19496       int regno;
19497
19498       /* Emit any call-via-reg trampolines that are needed for v4t support
19499          of call_reg and call_value_reg type insns.  */
19500       for (regno = 0; regno < LR_REGNUM; regno++)
19501         {
19502           rtx label = cfun->machine->call_via[regno];
19503
19504           if (label != NULL)
19505             {
19506               switch_to_section (function_section (current_function_decl));
19507               targetm.asm_out.internal_label (asm_out_file, "L",
19508                                               CODE_LABEL_NUMBER (label));
19509               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19510             }
19511         }
19512
19513       /* ??? Probably not safe to set this here, since it assumes that a
19514          function will be emitted as assembly immediately after we generate
19515          RTL for it.  This does not happen for inline functions.  */
19516       cfun->machine->return_used_this_function = 0;
19517     }
19518   else /* TARGET_32BIT */
19519     {
19520       /* We need to take into account any stack-frame rounding.  */
19521       offsets = arm_get_frame_offsets ();
19522
19523       gcc_assert (!use_return_insn (FALSE, NULL)
19524                   || (cfun->machine->return_used_this_function != 0)
19525                   || offsets->saved_regs == offsets->outgoing_args
19526                   || frame_pointer_needed);
19527     }
19528 }
19529
19530 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19531    STR and STRD.  If an even number of registers are being pushed, one
19532    or more STRD patterns are created for each register pair.  If an
19533    odd number of registers are pushed, emit an initial STR followed by
19534    as many STRD instructions as are needed.  This works best when the
19535    stack is initially 64-bit aligned (the normal case), since it
19536    ensures that each STRD is also 64-bit aligned.  */
19537 static void
19538 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19539 {
19540   int num_regs = 0;
19541   int i;
19542   int regno;
19543   rtx par = NULL_RTX;
19544   rtx dwarf = NULL_RTX;
19545   rtx tmp;
19546   bool first = true;
19547
19548   num_regs = bit_count (saved_regs_mask);
19549
19550   /* Must be at least one register to save, and can't save SP or PC.  */
19551   gcc_assert (num_regs > 0 && num_regs <= 14);
19552   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19553   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19554
19555   /* Create sequence for DWARF info.  All the frame-related data for
19556      debugging is held in this wrapper.  */
19557   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19558
19559   /* Describe the stack adjustment.  */
19560   tmp = gen_rtx_SET (VOIDmode,
19561                       stack_pointer_rtx,
19562                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19563   RTX_FRAME_RELATED_P (tmp) = 1;
19564   XVECEXP (dwarf, 0, 0) = tmp;
19565
19566   /* Find the first register.  */
19567   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19568     ;
19569
19570   i = 0;
19571
19572   /* If there's an odd number of registers to push.  Start off by
19573      pushing a single register.  This ensures that subsequent strd
19574      operations are dword aligned (assuming that SP was originally
19575      64-bit aligned).  */
19576   if ((num_regs & 1) != 0)
19577     {
19578       rtx reg, mem, insn;
19579
19580       reg = gen_rtx_REG (SImode, regno);
19581       if (num_regs == 1)
19582         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19583                                                      stack_pointer_rtx));
19584       else
19585         mem = gen_frame_mem (Pmode,
19586                              gen_rtx_PRE_MODIFY
19587                              (Pmode, stack_pointer_rtx,
19588                               plus_constant (Pmode, stack_pointer_rtx,
19589                                              -4 * num_regs)));
19590
19591       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19592       RTX_FRAME_RELATED_P (tmp) = 1;
19593       insn = emit_insn (tmp);
19594       RTX_FRAME_RELATED_P (insn) = 1;
19595       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19596       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19597                          reg);
19598       RTX_FRAME_RELATED_P (tmp) = 1;
19599       i++;
19600       regno++;
19601       XVECEXP (dwarf, 0, i) = tmp;
19602       first = false;
19603     }
19604
19605   while (i < num_regs)
19606     if (saved_regs_mask & (1 << regno))
19607       {
19608         rtx reg1, reg2, mem1, mem2;
19609         rtx tmp0, tmp1, tmp2;
19610         int regno2;
19611
19612         /* Find the register to pair with this one.  */
19613         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19614              regno2++)
19615           ;
19616
19617         reg1 = gen_rtx_REG (SImode, regno);
19618         reg2 = gen_rtx_REG (SImode, regno2);
19619
19620         if (first)
19621           {
19622             rtx insn;
19623
19624             first = false;
19625             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19626                                                         stack_pointer_rtx,
19627                                                         -4 * num_regs));
19628             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19629                                                         stack_pointer_rtx,
19630                                                         -4 * (num_regs - 1)));
19631             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19632                                 plus_constant (Pmode, stack_pointer_rtx,
19633                                                -4 * (num_regs)));
19634             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19635             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19636             RTX_FRAME_RELATED_P (tmp0) = 1;
19637             RTX_FRAME_RELATED_P (tmp1) = 1;
19638             RTX_FRAME_RELATED_P (tmp2) = 1;
19639             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19640             XVECEXP (par, 0, 0) = tmp0;
19641             XVECEXP (par, 0, 1) = tmp1;
19642             XVECEXP (par, 0, 2) = tmp2;
19643             insn = emit_insn (par);
19644             RTX_FRAME_RELATED_P (insn) = 1;
19645             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19646           }
19647         else
19648           {
19649             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19650                                                         stack_pointer_rtx,
19651                                                         4 * i));
19652             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19653                                                         stack_pointer_rtx,
19654                                                         4 * (i + 1)));
19655             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19656             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19657             RTX_FRAME_RELATED_P (tmp1) = 1;
19658             RTX_FRAME_RELATED_P (tmp2) = 1;
19659             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19660             XVECEXP (par, 0, 0) = tmp1;
19661             XVECEXP (par, 0, 1) = tmp2;
19662             emit_insn (par);
19663           }
19664
19665         /* Create unwind information.  This is an approximation.  */
19666         tmp1 = gen_rtx_SET (VOIDmode,
19667                             gen_frame_mem (Pmode,
19668                                            plus_constant (Pmode,
19669                                                           stack_pointer_rtx,
19670                                                           4 * i)),
19671                             reg1);
19672         tmp2 = gen_rtx_SET (VOIDmode,
19673                             gen_frame_mem (Pmode,
19674                                            plus_constant (Pmode,
19675                                                           stack_pointer_rtx,
19676                                                           4 * (i + 1))),
19677                             reg2);
19678
19679         RTX_FRAME_RELATED_P (tmp1) = 1;
19680         RTX_FRAME_RELATED_P (tmp2) = 1;
19681         XVECEXP (dwarf, 0, i + 1) = tmp1;
19682         XVECEXP (dwarf, 0, i + 2) = tmp2;
19683         i += 2;
19684         regno = regno2 + 1;
19685       }
19686     else
19687       regno++;
19688
19689   return;
19690 }
19691
19692 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19693    whenever possible, otherwise it emits single-word stores.  The first store
19694    also allocates stack space for all saved registers, using writeback with
19695    post-addressing mode.  All other stores use offset addressing.  If no STRD
19696    can be emitted, this function emits a sequence of single-word stores,
19697    and not an STM as before, because single-word stores provide more freedom
19698    scheduling and can be turned into an STM by peephole optimizations.  */
19699 static void
19700 arm_emit_strd_push (unsigned long saved_regs_mask)
19701 {
19702   int num_regs = 0;
19703   int i, j, dwarf_index  = 0;
19704   int offset = 0;
19705   rtx dwarf = NULL_RTX;
19706   rtx insn = NULL_RTX;
19707   rtx tmp, mem;
19708
19709   /* TODO: A more efficient code can be emitted by changing the
19710      layout, e.g., first push all pairs that can use STRD to keep the
19711      stack aligned, and then push all other registers.  */
19712   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19713     if (saved_regs_mask & (1 << i))
19714       num_regs++;
19715
19716   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19717   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19718   gcc_assert (num_regs > 0);
19719
19720   /* Create sequence for DWARF info.  */
19721   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19722
19723   /* For dwarf info, we generate explicit stack update.  */
19724   tmp = gen_rtx_SET (VOIDmode,
19725                      stack_pointer_rtx,
19726                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19727   RTX_FRAME_RELATED_P (tmp) = 1;
19728   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19729
19730   /* Save registers.  */
19731   offset = - 4 * num_regs;
19732   j = 0;
19733   while (j <= LAST_ARM_REGNUM)
19734     if (saved_regs_mask & (1 << j))
19735       {
19736         if ((j % 2 == 0)
19737             && (saved_regs_mask & (1 << (j + 1))))
19738           {
19739             /* Current register and previous register form register pair for
19740                which STRD can be generated.  */
19741             if (offset < 0)
19742               {
19743                 /* Allocate stack space for all saved registers.  */
19744                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19745                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19746                 mem = gen_frame_mem (DImode, tmp);
19747                 offset = 0;
19748               }
19749             else if (offset > 0)
19750               mem = gen_frame_mem (DImode,
19751                                    plus_constant (Pmode,
19752                                                   stack_pointer_rtx,
19753                                                   offset));
19754             else
19755               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19756
19757             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19758             RTX_FRAME_RELATED_P (tmp) = 1;
19759             tmp = emit_insn (tmp);
19760
19761             /* Record the first store insn.  */
19762             if (dwarf_index == 1)
19763               insn = tmp;
19764
19765             /* Generate dwarf info.  */
19766             mem = gen_frame_mem (SImode,
19767                                  plus_constant (Pmode,
19768                                                 stack_pointer_rtx,
19769                                                 offset));
19770             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19771             RTX_FRAME_RELATED_P (tmp) = 1;
19772             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19773
19774             mem = gen_frame_mem (SImode,
19775                                  plus_constant (Pmode,
19776                                                 stack_pointer_rtx,
19777                                                 offset + 4));
19778             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19779             RTX_FRAME_RELATED_P (tmp) = 1;
19780             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19781
19782             offset += 8;
19783             j += 2;
19784           }
19785         else
19786           {
19787             /* Emit a single word store.  */
19788             if (offset < 0)
19789               {
19790                 /* Allocate stack space for all saved registers.  */
19791                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19792                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19793                 mem = gen_frame_mem (SImode, tmp);
19794                 offset = 0;
19795               }
19796             else if (offset > 0)
19797               mem = gen_frame_mem (SImode,
19798                                    plus_constant (Pmode,
19799                                                   stack_pointer_rtx,
19800                                                   offset));
19801             else
19802               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19803
19804             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19805             RTX_FRAME_RELATED_P (tmp) = 1;
19806             tmp = emit_insn (tmp);
19807
19808             /* Record the first store insn.  */
19809             if (dwarf_index == 1)
19810               insn = tmp;
19811
19812             /* Generate dwarf info.  */
19813             mem = gen_frame_mem (SImode,
19814                                  plus_constant(Pmode,
19815                                                stack_pointer_rtx,
19816                                                offset));
19817             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19818             RTX_FRAME_RELATED_P (tmp) = 1;
19819             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19820
19821             offset += 4;
19822             j += 1;
19823           }
19824       }
19825     else
19826       j++;
19827
19828   /* Attach dwarf info to the first insn we generate.  */
19829   gcc_assert (insn != NULL_RTX);
19830   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19831   RTX_FRAME_RELATED_P (insn) = 1;
19832 }
19833
19834 /* Generate and emit an insn that we will recognize as a push_multi.
19835    Unfortunately, since this insn does not reflect very well the actual
19836    semantics of the operation, we need to annotate the insn for the benefit
19837    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
19838    MASK for registers that should be annotated for DWARF2 frame unwind
19839    information.  */
19840 static rtx
19841 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19842 {
19843   int num_regs = 0;
19844   int num_dwarf_regs = 0;
19845   int i, j;
19846   rtx par;
19847   rtx dwarf;
19848   int dwarf_par_index;
19849   rtx tmp, reg;
19850
19851   /* We don't record the PC in the dwarf frame information.  */
19852   dwarf_regs_mask &= ~(1 << PC_REGNUM);
19853
19854   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19855     {
19856       if (mask & (1 << i))
19857         num_regs++;
19858       if (dwarf_regs_mask & (1 << i))
19859         num_dwarf_regs++;
19860     }
19861
19862   gcc_assert (num_regs && num_regs <= 16);
19863   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19864
19865   /* For the body of the insn we are going to generate an UNSPEC in
19866      parallel with several USEs.  This allows the insn to be recognized
19867      by the push_multi pattern in the arm.md file.
19868
19869      The body of the insn looks something like this:
19870
19871        (parallel [
19872            (set (mem:BLK (pre_modify:SI (reg:SI sp)
19873                                         (const_int:SI <num>)))
19874                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19875            (use (reg:SI XX))
19876            (use (reg:SI YY))
19877            ...
19878         ])
19879
19880      For the frame note however, we try to be more explicit and actually
19881      show each register being stored into the stack frame, plus a (single)
19882      decrement of the stack pointer.  We do it this way in order to be
19883      friendly to the stack unwinding code, which only wants to see a single
19884      stack decrement per instruction.  The RTL we generate for the note looks
19885      something like this:
19886
19887       (sequence [
19888            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19889            (set (mem:SI (reg:SI sp)) (reg:SI r4))
19890            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19891            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19892            ...
19893         ])
19894
19895      FIXME:: In an ideal world the PRE_MODIFY would not exist and
19896      instead we'd have a parallel expression detailing all
19897      the stores to the various memory addresses so that debug
19898      information is more up-to-date. Remember however while writing
19899      this to take care of the constraints with the push instruction.
19900
19901      Note also that this has to be taken care of for the VFP registers.
19902
19903      For more see PR43399.  */
19904
19905   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19906   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19907   dwarf_par_index = 1;
19908
19909   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19910     {
19911       if (mask & (1 << i))
19912         {
19913           reg = gen_rtx_REG (SImode, i);
19914
19915           XVECEXP (par, 0, 0)
19916             = gen_rtx_SET (VOIDmode,
19917                            gen_frame_mem
19918                            (BLKmode,
19919                             gen_rtx_PRE_MODIFY (Pmode,
19920                                                 stack_pointer_rtx,
19921                                                 plus_constant
19922                                                 (Pmode, stack_pointer_rtx,
19923                                                  -4 * num_regs))
19924                             ),
19925                            gen_rtx_UNSPEC (BLKmode,
19926                                            gen_rtvec (1, reg),
19927                                            UNSPEC_PUSH_MULT));
19928
19929           if (dwarf_regs_mask & (1 << i))
19930             {
19931               tmp = gen_rtx_SET (VOIDmode,
19932                                  gen_frame_mem (SImode, stack_pointer_rtx),
19933                                  reg);
19934               RTX_FRAME_RELATED_P (tmp) = 1;
19935               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19936             }
19937
19938           break;
19939         }
19940     }
19941
19942   for (j = 1, i++; j < num_regs; i++)
19943     {
19944       if (mask & (1 << i))
19945         {
19946           reg = gen_rtx_REG (SImode, i);
19947
19948           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19949
19950           if (dwarf_regs_mask & (1 << i))
19951             {
19952               tmp
19953                 = gen_rtx_SET (VOIDmode,
19954                                gen_frame_mem
19955                                (SImode,
19956                                 plus_constant (Pmode, stack_pointer_rtx,
19957                                                4 * j)),
19958                                reg);
19959               RTX_FRAME_RELATED_P (tmp) = 1;
19960               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19961             }
19962
19963           j++;
19964         }
19965     }
19966
19967   par = emit_insn (par);
19968
19969   tmp = gen_rtx_SET (VOIDmode,
19970                      stack_pointer_rtx,
19971                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19972   RTX_FRAME_RELATED_P (tmp) = 1;
19973   XVECEXP (dwarf, 0, 0) = tmp;
19974
19975   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19976
19977   return par;
19978 }
19979
19980 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19981    SIZE is the offset to be adjusted.
19982    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
19983 static void
19984 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19985 {
19986   rtx dwarf;
19987
19988   RTX_FRAME_RELATED_P (insn) = 1;
19989   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19990   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19991 }
19992
19993 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19994    SAVED_REGS_MASK shows which registers need to be restored.
19995
19996    Unfortunately, since this insn does not reflect very well the actual
19997    semantics of the operation, we need to annotate the insn for the benefit
19998    of DWARF2 frame unwind information.  */
19999 static void
20000 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20001 {
20002   int num_regs = 0;
20003   int i, j;
20004   rtx par;
20005   rtx dwarf = NULL_RTX;
20006   rtx tmp, reg;
20007   bool return_in_pc;
20008   int offset_adj;
20009   int emit_update;
20010
20011   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20012   offset_adj = return_in_pc ? 1 : 0;
20013   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20014     if (saved_regs_mask & (1 << i))
20015       num_regs++;
20016
20017   gcc_assert (num_regs && num_regs <= 16);
20018
20019   /* If SP is in reglist, then we don't emit SP update insn.  */
20020   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20021
20022   /* The parallel needs to hold num_regs SETs
20023      and one SET for the stack update.  */
20024   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20025
20026   if (return_in_pc)
20027     {
20028       tmp = ret_rtx;
20029       XVECEXP (par, 0, 0) = tmp;
20030     }
20031
20032   if (emit_update)
20033     {
20034       /* Increment the stack pointer, based on there being
20035          num_regs 4-byte registers to restore.  */
20036       tmp = gen_rtx_SET (VOIDmode,
20037                          stack_pointer_rtx,
20038                          plus_constant (Pmode,
20039                                         stack_pointer_rtx,
20040                                         4 * num_regs));
20041       RTX_FRAME_RELATED_P (tmp) = 1;
20042       XVECEXP (par, 0, offset_adj) = tmp;
20043     }
20044
20045   /* Now restore every reg, which may include PC.  */
20046   for (j = 0, i = 0; j < num_regs; i++)
20047     if (saved_regs_mask & (1 << i))
20048       {
20049         reg = gen_rtx_REG (SImode, i);
20050         if ((num_regs == 1) && emit_update && !return_in_pc)
20051           {
20052             /* Emit single load with writeback.  */
20053             tmp = gen_frame_mem (SImode,
20054                                  gen_rtx_POST_INC (Pmode,
20055                                                    stack_pointer_rtx));
20056             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20057             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20058             return;
20059           }
20060
20061         tmp = gen_rtx_SET (VOIDmode,
20062                            reg,
20063                            gen_frame_mem
20064                            (SImode,
20065                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20066         RTX_FRAME_RELATED_P (tmp) = 1;
20067         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20068
20069         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20070            should not have PC, skip PC.  */
20071         if (i != PC_REGNUM)
20072           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20073
20074         j++;
20075       }
20076
20077   if (return_in_pc)
20078     par = emit_jump_insn (par);
20079   else
20080     par = emit_insn (par);
20081
20082   REG_NOTES (par) = dwarf;
20083   if (!return_in_pc)
20084     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20085                                  stack_pointer_rtx, stack_pointer_rtx);
20086 }
20087
20088 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20089    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20090
20091    Unfortunately, since this insn does not reflect very well the actual
20092    semantics of the operation, we need to annotate the insn for the benefit
20093    of DWARF2 frame unwind information.  */
20094 static void
20095 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20096 {
20097   int i, j;
20098   rtx par;
20099   rtx dwarf = NULL_RTX;
20100   rtx tmp, reg;
20101
20102   gcc_assert (num_regs && num_regs <= 32);
20103
20104     /* Workaround ARM10 VFPr1 bug.  */
20105   if (num_regs == 2 && !arm_arch6)
20106     {
20107       if (first_reg == 15)
20108         first_reg--;
20109
20110       num_regs++;
20111     }
20112
20113   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20114      there could be up to 32 D-registers to restore.
20115      If there are more than 16 D-registers, make two recursive calls,
20116      each of which emits one pop_multi instruction.  */
20117   if (num_regs > 16)
20118     {
20119       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20120       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20121       return;
20122     }
20123
20124   /* The parallel needs to hold num_regs SETs
20125      and one SET for the stack update.  */
20126   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20127
20128   /* Increment the stack pointer, based on there being
20129      num_regs 8-byte registers to restore.  */
20130   tmp = gen_rtx_SET (VOIDmode,
20131                      base_reg,
20132                      plus_constant (Pmode, base_reg, 8 * num_regs));
20133   RTX_FRAME_RELATED_P (tmp) = 1;
20134   XVECEXP (par, 0, 0) = tmp;
20135
20136   /* Now show every reg that will be restored, using a SET for each.  */
20137   for (j = 0, i=first_reg; j < num_regs; i += 2)
20138     {
20139       reg = gen_rtx_REG (DFmode, i);
20140
20141       tmp = gen_rtx_SET (VOIDmode,
20142                          reg,
20143                          gen_frame_mem
20144                          (DFmode,
20145                           plus_constant (Pmode, base_reg, 8 * j)));
20146       RTX_FRAME_RELATED_P (tmp) = 1;
20147       XVECEXP (par, 0, j + 1) = tmp;
20148
20149       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20150
20151       j++;
20152     }
20153
20154   par = emit_insn (par);
20155   REG_NOTES (par) = dwarf;
20156
20157   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20158   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20159     {
20160       RTX_FRAME_RELATED_P (par) = 1;
20161       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20162     }
20163   else
20164     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20165                                  base_reg, base_reg);
20166 }
20167
20168 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20169    number of registers are being popped, multiple LDRD patterns are created for
20170    all register pairs.  If odd number of registers are popped, last register is
20171    loaded by using LDR pattern.  */
20172 static void
20173 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20174 {
20175   int num_regs = 0;
20176   int i, j;
20177   rtx par = NULL_RTX;
20178   rtx dwarf = NULL_RTX;
20179   rtx tmp, reg, tmp1;
20180   bool return_in_pc;
20181
20182   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20183   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20184     if (saved_regs_mask & (1 << i))
20185       num_regs++;
20186
20187   gcc_assert (num_regs && num_regs <= 16);
20188
20189   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20190      to be popped.  So, if num_regs is even, now it will become odd,
20191      and we can generate pop with PC.  If num_regs is odd, it will be
20192      even now, and ldr with return can be generated for PC.  */
20193   if (return_in_pc)
20194     num_regs--;
20195
20196   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20197
20198   /* Var j iterates over all the registers to gather all the registers in
20199      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20200      A PARALLEL RTX of register-pair is created here, so that pattern for
20201      LDRD can be matched.  As PC is always last register to be popped, and
20202      we have already decremented num_regs if PC, we don't have to worry
20203      about PC in this loop.  */
20204   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20205     if (saved_regs_mask & (1 << j))
20206       {
20207         /* Create RTX for memory load.  */
20208         reg = gen_rtx_REG (SImode, j);
20209         tmp = gen_rtx_SET (SImode,
20210                            reg,
20211                            gen_frame_mem (SImode,
20212                                plus_constant (Pmode,
20213                                               stack_pointer_rtx, 4 * i)));
20214         RTX_FRAME_RELATED_P (tmp) = 1;
20215
20216         if (i % 2 == 0)
20217           {
20218             /* When saved-register index (i) is even, the RTX to be emitted is
20219                yet to be created.  Hence create it first.  The LDRD pattern we
20220                are generating is :
20221                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20222                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20223                where target registers need not be consecutive.  */
20224             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20225             dwarf = NULL_RTX;
20226           }
20227
20228         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20229            added as 0th element and if i is odd, reg_i is added as 1st element
20230            of LDRD pattern shown above.  */
20231         XVECEXP (par, 0, (i % 2)) = tmp;
20232         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20233
20234         if ((i % 2) == 1)
20235           {
20236             /* When saved-register index (i) is odd, RTXs for both the registers
20237                to be loaded are generated in above given LDRD pattern, and the
20238                pattern can be emitted now.  */
20239             par = emit_insn (par);
20240             REG_NOTES (par) = dwarf;
20241             RTX_FRAME_RELATED_P (par) = 1;
20242           }
20243
20244         i++;
20245       }
20246
20247   /* If the number of registers pushed is odd AND return_in_pc is false OR
20248      number of registers are even AND return_in_pc is true, last register is
20249      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20250      then LDR with post increment.  */
20251
20252   /* Increment the stack pointer, based on there being
20253      num_regs 4-byte registers to restore.  */
20254   tmp = gen_rtx_SET (VOIDmode,
20255                      stack_pointer_rtx,
20256                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20257   RTX_FRAME_RELATED_P (tmp) = 1;
20258   tmp = emit_insn (tmp);
20259   if (!return_in_pc)
20260     {
20261       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20262                                    stack_pointer_rtx, stack_pointer_rtx);
20263     }
20264
20265   dwarf = NULL_RTX;
20266
20267   if (((num_regs % 2) == 1 && !return_in_pc)
20268       || ((num_regs % 2) == 0 && return_in_pc))
20269     {
20270       /* Scan for the single register to be popped.  Skip until the saved
20271          register is found.  */
20272       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20273
20274       /* Gen LDR with post increment here.  */
20275       tmp1 = gen_rtx_MEM (SImode,
20276                           gen_rtx_POST_INC (SImode,
20277                                             stack_pointer_rtx));
20278       set_mem_alias_set (tmp1, get_frame_alias_set ());
20279
20280       reg = gen_rtx_REG (SImode, j);
20281       tmp = gen_rtx_SET (SImode, reg, tmp1);
20282       RTX_FRAME_RELATED_P (tmp) = 1;
20283       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20284
20285       if (return_in_pc)
20286         {
20287           /* If return_in_pc, j must be PC_REGNUM.  */
20288           gcc_assert (j == PC_REGNUM);
20289           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20290           XVECEXP (par, 0, 0) = ret_rtx;
20291           XVECEXP (par, 0, 1) = tmp;
20292           par = emit_jump_insn (par);
20293         }
20294       else
20295         {
20296           par = emit_insn (tmp);
20297           REG_NOTES (par) = dwarf;
20298           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20299                                        stack_pointer_rtx, stack_pointer_rtx);
20300         }
20301
20302     }
20303   else if ((num_regs % 2) == 1 && return_in_pc)
20304     {
20305       /* There are 2 registers to be popped.  So, generate the pattern
20306          pop_multiple_with_stack_update_and_return to pop in PC.  */
20307       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20308     }
20309
20310   return;
20311 }
20312
20313 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20314    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20315    offset addressing and then generates one separate stack udpate. This provides
20316    more scheduling freedom, compared to writeback on every load.  However,
20317    if the function returns using load into PC directly
20318    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20319    before the last load.  TODO: Add a peephole optimization to recognize
20320    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20321    peephole optimization to merge the load at stack-offset zero
20322    with the stack update instruction using load with writeback
20323    in post-index addressing mode.  */
20324 static void
20325 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20326 {
20327   int j = 0;
20328   int offset = 0;
20329   rtx par = NULL_RTX;
20330   rtx dwarf = NULL_RTX;
20331   rtx tmp, mem;
20332
20333   /* Restore saved registers.  */
20334   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20335   j = 0;
20336   while (j <= LAST_ARM_REGNUM)
20337     if (saved_regs_mask & (1 << j))
20338       {
20339         if ((j % 2) == 0
20340             && (saved_regs_mask & (1 << (j + 1)))
20341             && (j + 1) != PC_REGNUM)
20342           {
20343             /* Current register and next register form register pair for which
20344                LDRD can be generated. PC is always the last register popped, and
20345                we handle it separately.  */
20346             if (offset > 0)
20347               mem = gen_frame_mem (DImode,
20348                                    plus_constant (Pmode,
20349                                                   stack_pointer_rtx,
20350                                                   offset));
20351             else
20352               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20353
20354             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20355             tmp = emit_insn (tmp);
20356             RTX_FRAME_RELATED_P (tmp) = 1;
20357
20358             /* Generate dwarf info.  */
20359
20360             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20361                                     gen_rtx_REG (SImode, j),
20362                                     NULL_RTX);
20363             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20364                                     gen_rtx_REG (SImode, j + 1),
20365                                     dwarf);
20366
20367             REG_NOTES (tmp) = dwarf;
20368
20369             offset += 8;
20370             j += 2;
20371           }
20372         else if (j != PC_REGNUM)
20373           {
20374             /* Emit a single word load.  */
20375             if (offset > 0)
20376               mem = gen_frame_mem (SImode,
20377                                    plus_constant (Pmode,
20378                                                   stack_pointer_rtx,
20379                                                   offset));
20380             else
20381               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20382
20383             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20384             tmp = emit_insn (tmp);
20385             RTX_FRAME_RELATED_P (tmp) = 1;
20386
20387             /* Generate dwarf info.  */
20388             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20389                                               gen_rtx_REG (SImode, j),
20390                                               NULL_RTX);
20391
20392             offset += 4;
20393             j += 1;
20394           }
20395         else /* j == PC_REGNUM */
20396           j++;
20397       }
20398     else
20399       j++;
20400
20401   /* Update the stack.  */
20402   if (offset > 0)
20403     {
20404       tmp = gen_rtx_SET (Pmode,
20405                          stack_pointer_rtx,
20406                          plus_constant (Pmode,
20407                                         stack_pointer_rtx,
20408                                         offset));
20409       tmp = emit_insn (tmp);
20410       arm_add_cfa_adjust_cfa_note (tmp, offset,
20411                                    stack_pointer_rtx, stack_pointer_rtx);
20412       offset = 0;
20413     }
20414
20415   if (saved_regs_mask & (1 << PC_REGNUM))
20416     {
20417       /* Only PC is to be popped.  */
20418       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20419       XVECEXP (par, 0, 0) = ret_rtx;
20420       tmp = gen_rtx_SET (SImode,
20421                          gen_rtx_REG (SImode, PC_REGNUM),
20422                          gen_frame_mem (SImode,
20423                                         gen_rtx_POST_INC (SImode,
20424                                                           stack_pointer_rtx)));
20425       RTX_FRAME_RELATED_P (tmp) = 1;
20426       XVECEXP (par, 0, 1) = tmp;
20427       par = emit_jump_insn (par);
20428
20429       /* Generate dwarf info.  */
20430       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20431                               gen_rtx_REG (SImode, PC_REGNUM),
20432                               NULL_RTX);
20433       REG_NOTES (par) = dwarf;
20434       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20435                                    stack_pointer_rtx, stack_pointer_rtx);
20436     }
20437 }
20438
20439 /* Calculate the size of the return value that is passed in registers.  */
20440 static unsigned
20441 arm_size_return_regs (void)
20442 {
20443   enum machine_mode mode;
20444
20445   if (crtl->return_rtx != 0)
20446     mode = GET_MODE (crtl->return_rtx);
20447   else
20448     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20449
20450   return GET_MODE_SIZE (mode);
20451 }
20452
20453 /* Return true if the current function needs to save/restore LR.  */
20454 static bool
20455 thumb_force_lr_save (void)
20456 {
20457   return !cfun->machine->lr_save_eliminated
20458          && (!leaf_function_p ()
20459              || thumb_far_jump_used_p ()
20460              || df_regs_ever_live_p (LR_REGNUM));
20461 }
20462
20463 /* We do not know if r3 will be available because
20464    we do have an indirect tailcall happening in this
20465    particular case.  */
20466 static bool
20467 is_indirect_tailcall_p (rtx call)
20468 {
20469   rtx pat = PATTERN (call);
20470
20471   /* Indirect tail call.  */
20472   pat = XVECEXP (pat, 0, 0);
20473   if (GET_CODE (pat) == SET)
20474     pat = SET_SRC (pat);
20475
20476   pat = XEXP (XEXP (pat, 0), 0);
20477   return REG_P (pat);
20478 }
20479
20480 /* Return true if r3 is used by any of the tail call insns in the
20481    current function.  */
20482 static bool
20483 any_sibcall_could_use_r3 (void)
20484 {
20485   edge_iterator ei;
20486   edge e;
20487
20488   if (!crtl->tail_call_emit)
20489     return false;
20490   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20491     if (e->flags & EDGE_SIBCALL)
20492       {
20493         rtx call = BB_END (e->src);
20494         if (!CALL_P (call))
20495           call = prev_nonnote_nondebug_insn (call);
20496         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20497         if (find_regno_fusage (call, USE, 3)
20498             || is_indirect_tailcall_p (call))
20499           return true;
20500       }
20501   return false;
20502 }
20503
20504
20505 /* Compute the distance from register FROM to register TO.
20506    These can be the arg pointer (26), the soft frame pointer (25),
20507    the stack pointer (13) or the hard frame pointer (11).
20508    In thumb mode r7 is used as the soft frame pointer, if needed.
20509    Typical stack layout looks like this:
20510
20511        old stack pointer -> |    |
20512                              ----
20513                             |    | \
20514                             |    |   saved arguments for
20515                             |    |   vararg functions
20516                             |    | /
20517                               --
20518    hard FP & arg pointer -> |    | \
20519                             |    |   stack
20520                             |    |   frame
20521                             |    | /
20522                               --
20523                             |    | \
20524                             |    |   call saved
20525                             |    |   registers
20526       soft frame pointer -> |    | /
20527                               --
20528                             |    | \
20529                             |    |   local
20530                             |    |   variables
20531      locals base pointer -> |    | /
20532                               --
20533                             |    | \
20534                             |    |   outgoing
20535                             |    |   arguments
20536    current stack pointer -> |    | /
20537                               --
20538
20539   For a given function some or all of these stack components
20540   may not be needed, giving rise to the possibility of
20541   eliminating some of the registers.
20542
20543   The values returned by this function must reflect the behavior
20544   of arm_expand_prologue() and arm_compute_save_reg_mask().
20545
20546   The sign of the number returned reflects the direction of stack
20547   growth, so the values are positive for all eliminations except
20548   from the soft frame pointer to the hard frame pointer.
20549
20550   SFP may point just inside the local variables block to ensure correct
20551   alignment.  */
20552
20553
20554 /* Calculate stack offsets.  These are used to calculate register elimination
20555    offsets and in prologue/epilogue code.  Also calculates which registers
20556    should be saved.  */
20557
20558 static arm_stack_offsets *
20559 arm_get_frame_offsets (void)
20560 {
20561   struct arm_stack_offsets *offsets;
20562   unsigned long func_type;
20563   int leaf;
20564   int saved;
20565   int core_saved;
20566   HOST_WIDE_INT frame_size;
20567   int i;
20568
20569   offsets = &cfun->machine->stack_offsets;
20570
20571   /* We need to know if we are a leaf function.  Unfortunately, it
20572      is possible to be called after start_sequence has been called,
20573      which causes get_insns to return the insns for the sequence,
20574      not the function, which will cause leaf_function_p to return
20575      the incorrect result.
20576
20577      to know about leaf functions once reload has completed, and the
20578      frame size cannot be changed after that time, so we can safely
20579      use the cached value.  */
20580
20581   if (reload_completed)
20582     return offsets;
20583
20584   /* Initially this is the size of the local variables.  It will translated
20585      into an offset once we have determined the size of preceding data.  */
20586   frame_size = ROUND_UP_WORD (get_frame_size ());
20587
20588   leaf = leaf_function_p ();
20589
20590   /* Space for variadic functions.  */
20591   offsets->saved_args = crtl->args.pretend_args_size;
20592
20593   /* In Thumb mode this is incorrect, but never used.  */
20594   offsets->frame
20595     = (offsets->saved_args
20596        + arm_compute_static_chain_stack_bytes ()
20597        + (frame_pointer_needed ? 4 : 0));
20598
20599   if (TARGET_32BIT)
20600     {
20601       unsigned int regno;
20602
20603       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20604       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20605       saved = core_saved;
20606
20607       /* We know that SP will be doubleword aligned on entry, and we must
20608          preserve that condition at any subroutine call.  We also require the
20609          soft frame pointer to be doubleword aligned.  */
20610
20611       if (TARGET_REALLY_IWMMXT)
20612         {
20613           /* Check for the call-saved iWMMXt registers.  */
20614           for (regno = FIRST_IWMMXT_REGNUM;
20615                regno <= LAST_IWMMXT_REGNUM;
20616                regno++)
20617             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20618               saved += 8;
20619         }
20620
20621       func_type = arm_current_func_type ();
20622       /* Space for saved VFP registers.  */
20623       if (! IS_VOLATILE (func_type)
20624           && TARGET_HARD_FLOAT && TARGET_VFP)
20625         saved += arm_get_vfp_saved_size ();
20626     }
20627   else /* TARGET_THUMB1 */
20628     {
20629       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20630       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20631       saved = core_saved;
20632       if (TARGET_BACKTRACE)
20633         saved += 16;
20634     }
20635
20636   /* Saved registers include the stack frame.  */
20637   offsets->saved_regs
20638     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20639   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20640
20641   /* A leaf function does not need any stack alignment if it has nothing
20642      on the stack.  */
20643   if (leaf && frame_size == 0
20644       /* However if it calls alloca(), we have a dynamically allocated
20645          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20646       && ! cfun->calls_alloca)
20647     {
20648       offsets->outgoing_args = offsets->soft_frame;
20649       offsets->locals_base = offsets->soft_frame;
20650       return offsets;
20651     }
20652
20653   /* Ensure SFP has the correct alignment.  */
20654   if (ARM_DOUBLEWORD_ALIGN
20655       && (offsets->soft_frame & 7))
20656     {
20657       offsets->soft_frame += 4;
20658       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20659          when there is a stack frame as the alignment will be rolled into
20660          the normal stack adjustment.  */
20661       if (frame_size + crtl->outgoing_args_size == 0)
20662         {
20663           int reg = -1;
20664
20665           /* If it is safe to use r3, then do so.  This sometimes
20666              generates better code on Thumb-2 by avoiding the need to
20667              use 32-bit push/pop instructions.  */
20668           if (! any_sibcall_could_use_r3 ()
20669               && arm_size_return_regs () <= 12
20670               && (offsets->saved_regs_mask & (1 << 3)) == 0
20671               && (TARGET_THUMB2
20672                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20673             {
20674               reg = 3;
20675             }
20676           else
20677             for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20678               {
20679                 /* Avoid fixed registers; they may be changed at
20680                    arbitrary times so it's unsafe to restore them
20681                    during the epilogue.  */
20682                 if (!fixed_regs[i]
20683                     && (offsets->saved_regs_mask & (1 << i)) == 0)
20684                   {
20685                     reg = i;
20686                     break;
20687                   }
20688               }
20689
20690           if (reg != -1)
20691             {
20692               offsets->saved_regs += 4;
20693               offsets->saved_regs_mask |= (1 << reg);
20694             }
20695         }
20696     }
20697
20698   offsets->locals_base = offsets->soft_frame + frame_size;
20699   offsets->outgoing_args = (offsets->locals_base
20700                             + crtl->outgoing_args_size);
20701
20702   if (ARM_DOUBLEWORD_ALIGN)
20703     {
20704       /* Ensure SP remains doubleword aligned.  */
20705       if (offsets->outgoing_args & 7)
20706         offsets->outgoing_args += 4;
20707       gcc_assert (!(offsets->outgoing_args & 7));
20708     }
20709
20710   return offsets;
20711 }
20712
20713
20714 /* Calculate the relative offsets for the different stack pointers.  Positive
20715    offsets are in the direction of stack growth.  */
20716
20717 HOST_WIDE_INT
20718 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20719 {
20720   arm_stack_offsets *offsets;
20721
20722   offsets = arm_get_frame_offsets ();
20723
20724   /* OK, now we have enough information to compute the distances.
20725      There must be an entry in these switch tables for each pair
20726      of registers in ELIMINABLE_REGS, even if some of the entries
20727      seem to be redundant or useless.  */
20728   switch (from)
20729     {
20730     case ARG_POINTER_REGNUM:
20731       switch (to)
20732         {
20733         case THUMB_HARD_FRAME_POINTER_REGNUM:
20734           return 0;
20735
20736         case FRAME_POINTER_REGNUM:
20737           /* This is the reverse of the soft frame pointer
20738              to hard frame pointer elimination below.  */
20739           return offsets->soft_frame - offsets->saved_args;
20740
20741         case ARM_HARD_FRAME_POINTER_REGNUM:
20742           /* This is only non-zero in the case where the static chain register
20743              is stored above the frame.  */
20744           return offsets->frame - offsets->saved_args - 4;
20745
20746         case STACK_POINTER_REGNUM:
20747           /* If nothing has been pushed on the stack at all
20748              then this will return -4.  This *is* correct!  */
20749           return offsets->outgoing_args - (offsets->saved_args + 4);
20750
20751         default:
20752           gcc_unreachable ();
20753         }
20754       gcc_unreachable ();
20755
20756     case FRAME_POINTER_REGNUM:
20757       switch (to)
20758         {
20759         case THUMB_HARD_FRAME_POINTER_REGNUM:
20760           return 0;
20761
20762         case ARM_HARD_FRAME_POINTER_REGNUM:
20763           /* The hard frame pointer points to the top entry in the
20764              stack frame.  The soft frame pointer to the bottom entry
20765              in the stack frame.  If there is no stack frame at all,
20766              then they are identical.  */
20767
20768           return offsets->frame - offsets->soft_frame;
20769
20770         case STACK_POINTER_REGNUM:
20771           return offsets->outgoing_args - offsets->soft_frame;
20772
20773         default:
20774           gcc_unreachable ();
20775         }
20776       gcc_unreachable ();
20777
20778     default:
20779       /* You cannot eliminate from the stack pointer.
20780          In theory you could eliminate from the hard frame
20781          pointer to the stack pointer, but this will never
20782          happen, since if a stack frame is not needed the
20783          hard frame pointer will never be used.  */
20784       gcc_unreachable ();
20785     }
20786 }
20787
20788 /* Given FROM and TO register numbers, say whether this elimination is
20789    allowed.  Frame pointer elimination is automatically handled.
20790
20791    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20792    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20793    pointer, we must eliminate FRAME_POINTER_REGNUM into
20794    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20795    ARG_POINTER_REGNUM.  */
20796
20797 bool
20798 arm_can_eliminate (const int from, const int to)
20799 {
20800   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20801           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20802           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20803           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20804            true);
20805 }
20806
20807 /* Emit RTL to save coprocessor registers on function entry.  Returns the
20808    number of bytes pushed.  */
20809
20810 static int
20811 arm_save_coproc_regs(void)
20812 {
20813   int saved_size = 0;
20814   unsigned reg;
20815   unsigned start_reg;
20816   rtx insn;
20817
20818   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20819     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20820       {
20821         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20822         insn = gen_rtx_MEM (V2SImode, insn);
20823         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20824         RTX_FRAME_RELATED_P (insn) = 1;
20825         saved_size += 8;
20826       }
20827
20828   if (TARGET_HARD_FLOAT && TARGET_VFP)
20829     {
20830       start_reg = FIRST_VFP_REGNUM;
20831
20832       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20833         {
20834           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20835               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20836             {
20837               if (start_reg != reg)
20838                 saved_size += vfp_emit_fstmd (start_reg,
20839                                               (reg - start_reg) / 2);
20840               start_reg = reg + 2;
20841             }
20842         }
20843       if (start_reg != reg)
20844         saved_size += vfp_emit_fstmd (start_reg,
20845                                       (reg - start_reg) / 2);
20846     }
20847   return saved_size;
20848 }
20849
20850
20851 /* Set the Thumb frame pointer from the stack pointer.  */
20852
20853 static void
20854 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20855 {
20856   HOST_WIDE_INT amount;
20857   rtx insn, dwarf;
20858
20859   amount = offsets->outgoing_args - offsets->locals_base;
20860   if (amount < 1024)
20861     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20862                                   stack_pointer_rtx, GEN_INT (amount)));
20863   else
20864     {
20865       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20866       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
20867          expects the first two operands to be the same.  */
20868       if (TARGET_THUMB2)
20869         {
20870           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20871                                         stack_pointer_rtx,
20872                                         hard_frame_pointer_rtx));
20873         }
20874       else
20875         {
20876           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20877                                         hard_frame_pointer_rtx,
20878                                         stack_pointer_rtx));
20879         }
20880       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20881                            plus_constant (Pmode, stack_pointer_rtx, amount));
20882       RTX_FRAME_RELATED_P (dwarf) = 1;
20883       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20884     }
20885
20886   RTX_FRAME_RELATED_P (insn) = 1;
20887 }
20888
20889 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20890    function.  */
20891 void
20892 arm_expand_prologue (void)
20893 {
20894   rtx amount;
20895   rtx insn;
20896   rtx ip_rtx;
20897   unsigned long live_regs_mask;
20898   unsigned long func_type;
20899   int fp_offset = 0;
20900   int saved_pretend_args = 0;
20901   int saved_regs = 0;
20902   unsigned HOST_WIDE_INT args_to_push;
20903   arm_stack_offsets *offsets;
20904
20905   func_type = arm_current_func_type ();
20906
20907   /* Naked functions don't have prologues.  */
20908   if (IS_NAKED (func_type))
20909     return;
20910
20911   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
20912   args_to_push = crtl->args.pretend_args_size;
20913
20914   /* Compute which register we will have to save onto the stack.  */
20915   offsets = arm_get_frame_offsets ();
20916   live_regs_mask = offsets->saved_regs_mask;
20917
20918   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20919
20920   if (IS_STACKALIGN (func_type))
20921     {
20922       rtx r0, r1;
20923
20924       /* Handle a word-aligned stack pointer.  We generate the following:
20925
20926           mov r0, sp
20927           bic r1, r0, #7
20928           mov sp, r1
20929           <save and restore r0 in normal prologue/epilogue>
20930           mov sp, r0
20931           bx lr
20932
20933          The unwinder doesn't need to know about the stack realignment.
20934          Just tell it we saved SP in r0.  */
20935       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20936
20937       r0 = gen_rtx_REG (SImode, 0);
20938       r1 = gen_rtx_REG (SImode, 1);
20939
20940       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20941       RTX_FRAME_RELATED_P (insn) = 1;
20942       add_reg_note (insn, REG_CFA_REGISTER, NULL);
20943
20944       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20945
20946       /* ??? The CFA changes here, which may cause GDB to conclude that it
20947          has entered a different function.  That said, the unwind info is
20948          correct, individually, before and after this instruction because
20949          we've described the save of SP, which will override the default
20950          handling of SP as restoring from the CFA.  */
20951       emit_insn (gen_movsi (stack_pointer_rtx, r1));
20952     }
20953
20954   /* For APCS frames, if IP register is clobbered
20955      when creating frame, save that register in a special
20956      way.  */
20957   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20958     {
20959       if (IS_INTERRUPT (func_type))
20960         {
20961           /* Interrupt functions must not corrupt any registers.
20962              Creating a frame pointer however, corrupts the IP
20963              register, so we must push it first.  */
20964           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20965
20966           /* Do not set RTX_FRAME_RELATED_P on this insn.
20967              The dwarf stack unwinding code only wants to see one
20968              stack decrement per function, and this is not it.  If
20969              this instruction is labeled as being part of the frame
20970              creation sequence then dwarf2out_frame_debug_expr will
20971              die when it encounters the assignment of IP to FP
20972              later on, since the use of SP here establishes SP as
20973              the CFA register and not IP.
20974
20975              Anyway this instruction is not really part of the stack
20976              frame creation although it is part of the prologue.  */
20977         }
20978       else if (IS_NESTED (func_type))
20979         {
20980           /* The static chain register is the same as the IP register
20981              used as a scratch register during stack frame creation.
20982              To get around this need to find somewhere to store IP
20983              whilst the frame is being created.  We try the following
20984              places in order:
20985
20986                1. The last argument register r3 if it is available.
20987                2. A slot on the stack above the frame if there are no
20988                   arguments to push onto the stack.
20989                3. Register r3 again, after pushing the argument registers
20990                   onto the stack, if this is a varargs function.
20991                4. The last slot on the stack created for the arguments to
20992                   push, if this isn't a varargs function.
20993
20994              Note - we only need to tell the dwarf2 backend about the SP
20995              adjustment in the second variant; the static chain register
20996              doesn't need to be unwound, as it doesn't contain a value
20997              inherited from the caller.  */
20998
20999           if (!arm_r3_live_at_start_p ())
21000             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21001           else if (args_to_push == 0)
21002             {
21003               rtx addr, dwarf;
21004
21005               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21006               saved_regs += 4;
21007
21008               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21009               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21010               fp_offset = 4;
21011
21012               /* Just tell the dwarf backend that we adjusted SP.  */
21013               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21014                                    plus_constant (Pmode, stack_pointer_rtx,
21015                                                   -fp_offset));
21016               RTX_FRAME_RELATED_P (insn) = 1;
21017               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21018             }
21019           else
21020             {
21021               /* Store the args on the stack.  */
21022               if (cfun->machine->uses_anonymous_args)
21023                 {
21024                   insn
21025                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21026                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21027                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21028                   saved_pretend_args = 1;
21029                 }
21030               else
21031                 {
21032                   rtx addr, dwarf;
21033
21034                   if (args_to_push == 4)
21035                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21036                   else
21037                     addr
21038                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21039                                             plus_constant (Pmode,
21040                                                            stack_pointer_rtx,
21041                                                            -args_to_push));
21042
21043                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21044
21045                   /* Just tell the dwarf backend that we adjusted SP.  */
21046                   dwarf
21047                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21048                                    plus_constant (Pmode, stack_pointer_rtx,
21049                                                   -args_to_push));
21050                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21051                 }
21052
21053               RTX_FRAME_RELATED_P (insn) = 1;
21054               fp_offset = args_to_push;
21055               args_to_push = 0;
21056             }
21057         }
21058
21059       insn = emit_set_insn (ip_rtx,
21060                             plus_constant (Pmode, stack_pointer_rtx,
21061                                            fp_offset));
21062       RTX_FRAME_RELATED_P (insn) = 1;
21063     }
21064
21065   if (args_to_push)
21066     {
21067       /* Push the argument registers, or reserve space for them.  */
21068       if (cfun->machine->uses_anonymous_args)
21069         insn = emit_multi_reg_push
21070           ((0xf0 >> (args_to_push / 4)) & 0xf,
21071            (0xf0 >> (args_to_push / 4)) & 0xf);
21072       else
21073         insn = emit_insn
21074           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21075                        GEN_INT (- args_to_push)));
21076       RTX_FRAME_RELATED_P (insn) = 1;
21077     }
21078
21079   /* If this is an interrupt service routine, and the link register
21080      is going to be pushed, and we're not generating extra
21081      push of IP (needed when frame is needed and frame layout if apcs),
21082      subtracting four from LR now will mean that the function return
21083      can be done with a single instruction.  */
21084   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21085       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21086       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21087       && TARGET_ARM)
21088     {
21089       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21090
21091       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21092     }
21093
21094   if (live_regs_mask)
21095     {
21096       unsigned long dwarf_regs_mask = live_regs_mask;
21097
21098       saved_regs += bit_count (live_regs_mask) * 4;
21099       if (optimize_size && !frame_pointer_needed
21100           && saved_regs == offsets->saved_regs - offsets->saved_args)
21101         {
21102           /* If no coprocessor registers are being pushed and we don't have
21103              to worry about a frame pointer then push extra registers to
21104              create the stack frame.  This is done is a way that does not
21105              alter the frame layout, so is independent of the epilogue.  */
21106           int n;
21107           int frame;
21108           n = 0;
21109           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21110             n++;
21111           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21112           if (frame && n * 4 >= frame)
21113             {
21114               n = frame / 4;
21115               live_regs_mask |= (1 << n) - 1;
21116               saved_regs += frame;
21117             }
21118         }
21119
21120       if (TARGET_LDRD
21121           && current_tune->prefer_ldrd_strd
21122           && !optimize_function_for_size_p (cfun))
21123         {
21124           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21125           if (TARGET_THUMB2)
21126             thumb2_emit_strd_push (live_regs_mask);
21127           else if (TARGET_ARM
21128                    && !TARGET_APCS_FRAME
21129                    && !IS_INTERRUPT (func_type))
21130             arm_emit_strd_push (live_regs_mask);
21131           else
21132             {
21133               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21134               RTX_FRAME_RELATED_P (insn) = 1;
21135             }
21136         }
21137       else
21138         {
21139           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21140           RTX_FRAME_RELATED_P (insn) = 1;
21141         }
21142     }
21143
21144   if (! IS_VOLATILE (func_type))
21145     saved_regs += arm_save_coproc_regs ();
21146
21147   if (frame_pointer_needed && TARGET_ARM)
21148     {
21149       /* Create the new frame pointer.  */
21150       if (TARGET_APCS_FRAME)
21151         {
21152           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21153           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21154           RTX_FRAME_RELATED_P (insn) = 1;
21155
21156           if (IS_NESTED (func_type))
21157             {
21158               /* Recover the static chain register.  */
21159               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21160                 insn = gen_rtx_REG (SImode, 3);
21161               else
21162                 {
21163                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21164                   insn = gen_frame_mem (SImode, insn);
21165                 }
21166               emit_set_insn (ip_rtx, insn);
21167               /* Add a USE to stop propagate_one_insn() from barfing.  */
21168               emit_insn (gen_force_register_use (ip_rtx));
21169             }
21170         }
21171       else
21172         {
21173           insn = GEN_INT (saved_regs - 4);
21174           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21175                                         stack_pointer_rtx, insn));
21176           RTX_FRAME_RELATED_P (insn) = 1;
21177         }
21178     }
21179
21180   if (flag_stack_usage_info)
21181     current_function_static_stack_size
21182       = offsets->outgoing_args - offsets->saved_args;
21183
21184   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21185     {
21186       /* This add can produce multiple insns for a large constant, so we
21187          need to get tricky.  */
21188       rtx last = get_last_insn ();
21189
21190       amount = GEN_INT (offsets->saved_args + saved_regs
21191                         - offsets->outgoing_args);
21192
21193       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21194                                     amount));
21195       do
21196         {
21197           last = last ? NEXT_INSN (last) : get_insns ();
21198           RTX_FRAME_RELATED_P (last) = 1;
21199         }
21200       while (last != insn);
21201
21202       /* If the frame pointer is needed, emit a special barrier that
21203          will prevent the scheduler from moving stores to the frame
21204          before the stack adjustment.  */
21205       if (frame_pointer_needed)
21206         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21207                                          hard_frame_pointer_rtx));
21208     }
21209
21210
21211   if (frame_pointer_needed && TARGET_THUMB2)
21212     thumb_set_frame_pointer (offsets);
21213
21214   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21215     {
21216       unsigned long mask;
21217
21218       mask = live_regs_mask;
21219       mask &= THUMB2_WORK_REGS;
21220       if (!IS_NESTED (func_type))
21221         mask |= (1 << IP_REGNUM);
21222       arm_load_pic_register (mask);
21223     }
21224
21225   /* If we are profiling, make sure no instructions are scheduled before
21226      the call to mcount.  Similarly if the user has requested no
21227      scheduling in the prolog.  Similarly if we want non-call exceptions
21228      using the EABI unwinder, to prevent faulting instructions from being
21229      swapped with a stack adjustment.  */
21230   if (crtl->profile || !TARGET_SCHED_PROLOG
21231       || (arm_except_unwind_info (&global_options) == UI_TARGET
21232           && cfun->can_throw_non_call_exceptions))
21233     emit_insn (gen_blockage ());
21234
21235   /* If the link register is being kept alive, with the return address in it,
21236      then make sure that it does not get reused by the ce2 pass.  */
21237   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21238     cfun->machine->lr_save_eliminated = 1;
21239 }
21240 \f
21241 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21242 static void
21243 arm_print_condition (FILE *stream)
21244 {
21245   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21246     {
21247       /* Branch conversion is not implemented for Thumb-2.  */
21248       if (TARGET_THUMB)
21249         {
21250           output_operand_lossage ("predicated Thumb instruction");
21251           return;
21252         }
21253       if (current_insn_predicate != NULL)
21254         {
21255           output_operand_lossage
21256             ("predicated instruction in conditional sequence");
21257           return;
21258         }
21259
21260       fputs (arm_condition_codes[arm_current_cc], stream);
21261     }
21262   else if (current_insn_predicate)
21263     {
21264       enum arm_cond_code code;
21265
21266       if (TARGET_THUMB1)
21267         {
21268           output_operand_lossage ("predicated Thumb instruction");
21269           return;
21270         }
21271
21272       code = get_arm_condition_code (current_insn_predicate);
21273       fputs (arm_condition_codes[code], stream);
21274     }
21275 }
21276
21277
21278 /* If CODE is 'd', then the X is a condition operand and the instruction
21279    should only be executed if the condition is true.
21280    if CODE is 'D', then the X is a condition operand and the instruction
21281    should only be executed if the condition is false: however, if the mode
21282    of the comparison is CCFPEmode, then always execute the instruction -- we
21283    do this because in these circumstances !GE does not necessarily imply LT;
21284    in these cases the instruction pattern will take care to make sure that
21285    an instruction containing %d will follow, thereby undoing the effects of
21286    doing this instruction unconditionally.
21287    If CODE is 'N' then X is a floating point operand that must be negated
21288    before output.
21289    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21290    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21291 static void
21292 arm_print_operand (FILE *stream, rtx x, int code)
21293 {
21294   switch (code)
21295     {
21296     case '@':
21297       fputs (ASM_COMMENT_START, stream);
21298       return;
21299
21300     case '_':
21301       fputs (user_label_prefix, stream);
21302       return;
21303
21304     case '|':
21305       fputs (REGISTER_PREFIX, stream);
21306       return;
21307
21308     case '?':
21309       arm_print_condition (stream);
21310       return;
21311
21312     case '(':
21313       /* Nothing in unified syntax, otherwise the current condition code.  */
21314       if (!TARGET_UNIFIED_ASM)
21315         arm_print_condition (stream);
21316       break;
21317
21318     case ')':
21319       /* The current condition code in unified syntax, otherwise nothing.  */
21320       if (TARGET_UNIFIED_ASM)
21321         arm_print_condition (stream);
21322       break;
21323
21324     case '.':
21325       /* The current condition code for a condition code setting instruction.
21326          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21327       if (TARGET_UNIFIED_ASM)
21328         {
21329           fputc('s', stream);
21330           arm_print_condition (stream);
21331         }
21332       else
21333         {
21334           arm_print_condition (stream);
21335           fputc('s', stream);
21336         }
21337       return;
21338
21339     case '!':
21340       /* If the instruction is conditionally executed then print
21341          the current condition code, otherwise print 's'.  */
21342       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21343       if (current_insn_predicate)
21344         arm_print_condition (stream);
21345       else
21346         fputc('s', stream);
21347       break;
21348
21349     /* %# is a "break" sequence. It doesn't output anything, but is used to
21350        separate e.g. operand numbers from following text, if that text consists
21351        of further digits which we don't want to be part of the operand
21352        number.  */
21353     case '#':
21354       return;
21355
21356     case 'N':
21357       {
21358         REAL_VALUE_TYPE r;
21359         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21360         r = real_value_negate (&r);
21361         fprintf (stream, "%s", fp_const_from_val (&r));
21362       }
21363       return;
21364
21365     /* An integer or symbol address without a preceding # sign.  */
21366     case 'c':
21367       switch (GET_CODE (x))
21368         {
21369         case CONST_INT:
21370           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21371           break;
21372
21373         case SYMBOL_REF:
21374           output_addr_const (stream, x);
21375           break;
21376
21377         case CONST:
21378           if (GET_CODE (XEXP (x, 0)) == PLUS
21379               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21380             {
21381               output_addr_const (stream, x);
21382               break;
21383             }
21384           /* Fall through.  */
21385
21386         default:
21387           output_operand_lossage ("Unsupported operand for code '%c'", code);
21388         }
21389       return;
21390
21391     /* An integer that we want to print in HEX.  */
21392     case 'x':
21393       switch (GET_CODE (x))
21394         {
21395         case CONST_INT:
21396           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21397           break;
21398
21399         default:
21400           output_operand_lossage ("Unsupported operand for code '%c'", code);
21401         }
21402       return;
21403
21404     case 'B':
21405       if (CONST_INT_P (x))
21406         {
21407           HOST_WIDE_INT val;
21408           val = ARM_SIGN_EXTEND (~INTVAL (x));
21409           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21410         }
21411       else
21412         {
21413           putc ('~', stream);
21414           output_addr_const (stream, x);
21415         }
21416       return;
21417
21418     case 'L':
21419       /* The low 16 bits of an immediate constant.  */
21420       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21421       return;
21422
21423     case 'i':
21424       fprintf (stream, "%s", arithmetic_instr (x, 1));
21425       return;
21426
21427     case 'I':
21428       fprintf (stream, "%s", arithmetic_instr (x, 0));
21429       return;
21430
21431     case 'S':
21432       {
21433         HOST_WIDE_INT val;
21434         const char *shift;
21435
21436         shift = shift_op (x, &val);
21437
21438         if (shift)
21439           {
21440             fprintf (stream, ", %s ", shift);
21441             if (val == -1)
21442               arm_print_operand (stream, XEXP (x, 1), 0);
21443             else
21444               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21445           }
21446       }
21447       return;
21448
21449       /* An explanation of the 'Q', 'R' and 'H' register operands:
21450
21451          In a pair of registers containing a DI or DF value the 'Q'
21452          operand returns the register number of the register containing
21453          the least significant part of the value.  The 'R' operand returns
21454          the register number of the register containing the most
21455          significant part of the value.
21456
21457          The 'H' operand returns the higher of the two register numbers.
21458          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21459          same as the 'Q' operand, since the most significant part of the
21460          value is held in the lower number register.  The reverse is true
21461          on systems where WORDS_BIG_ENDIAN is false.
21462
21463          The purpose of these operands is to distinguish between cases
21464          where the endian-ness of the values is important (for example
21465          when they are added together), and cases where the endian-ness
21466          is irrelevant, but the order of register operations is important.
21467          For example when loading a value from memory into a register
21468          pair, the endian-ness does not matter.  Provided that the value
21469          from the lower memory address is put into the lower numbered
21470          register, and the value from the higher address is put into the
21471          higher numbered register, the load will work regardless of whether
21472          the value being loaded is big-wordian or little-wordian.  The
21473          order of the two register loads can matter however, if the address
21474          of the memory location is actually held in one of the registers
21475          being overwritten by the load.
21476
21477          The 'Q' and 'R' constraints are also available for 64-bit
21478          constants.  */
21479     case 'Q':
21480       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21481         {
21482           rtx part = gen_lowpart (SImode, x);
21483           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21484           return;
21485         }
21486
21487       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21488         {
21489           output_operand_lossage ("invalid operand for code '%c'", code);
21490           return;
21491         }
21492
21493       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21494       return;
21495
21496     case 'R':
21497       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21498         {
21499           enum machine_mode mode = GET_MODE (x);
21500           rtx part;
21501
21502           if (mode == VOIDmode)
21503             mode = DImode;
21504           part = gen_highpart_mode (SImode, mode, x);
21505           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21506           return;
21507         }
21508
21509       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21510         {
21511           output_operand_lossage ("invalid operand for code '%c'", code);
21512           return;
21513         }
21514
21515       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21516       return;
21517
21518     case 'H':
21519       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21520         {
21521           output_operand_lossage ("invalid operand for code '%c'", code);
21522           return;
21523         }
21524
21525       asm_fprintf (stream, "%r", REGNO (x) + 1);
21526       return;
21527
21528     case 'J':
21529       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21530         {
21531           output_operand_lossage ("invalid operand for code '%c'", code);
21532           return;
21533         }
21534
21535       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21536       return;
21537
21538     case 'K':
21539       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21540         {
21541           output_operand_lossage ("invalid operand for code '%c'", code);
21542           return;
21543         }
21544
21545       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21546       return;
21547
21548     case 'm':
21549       asm_fprintf (stream, "%r",
21550                    REG_P (XEXP (x, 0))
21551                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21552       return;
21553
21554     case 'M':
21555       asm_fprintf (stream, "{%r-%r}",
21556                    REGNO (x),
21557                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21558       return;
21559
21560     /* Like 'M', but writing doubleword vector registers, for use by Neon
21561        insns.  */
21562     case 'h':
21563       {
21564         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21565         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21566         if (numregs == 1)
21567           asm_fprintf (stream, "{d%d}", regno);
21568         else
21569           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21570       }
21571       return;
21572
21573     case 'd':
21574       /* CONST_TRUE_RTX means always -- that's the default.  */
21575       if (x == const_true_rtx)
21576         return;
21577
21578       if (!COMPARISON_P (x))
21579         {
21580           output_operand_lossage ("invalid operand for code '%c'", code);
21581           return;
21582         }
21583
21584       fputs (arm_condition_codes[get_arm_condition_code (x)],
21585              stream);
21586       return;
21587
21588     case 'D':
21589       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21590          want to do that.  */
21591       if (x == const_true_rtx)
21592         {
21593           output_operand_lossage ("instruction never executed");
21594           return;
21595         }
21596       if (!COMPARISON_P (x))
21597         {
21598           output_operand_lossage ("invalid operand for code '%c'", code);
21599           return;
21600         }
21601
21602       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21603                                  (get_arm_condition_code (x))],
21604              stream);
21605       return;
21606
21607     case 's':
21608     case 'V':
21609     case 'W':
21610     case 'X':
21611     case 'Y':
21612     case 'Z':
21613       /* Former Maverick support, removed after GCC-4.7.  */
21614       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21615       return;
21616
21617     case 'U':
21618       if (!REG_P (x)
21619           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21620           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21621         /* Bad value for wCG register number.  */
21622         {
21623           output_operand_lossage ("invalid operand for code '%c'", code);
21624           return;
21625         }
21626
21627       else
21628         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21629       return;
21630
21631       /* Print an iWMMXt control register name.  */
21632     case 'w':
21633       if (!CONST_INT_P (x)
21634           || INTVAL (x) < 0
21635           || INTVAL (x) >= 16)
21636         /* Bad value for wC register number.  */
21637         {
21638           output_operand_lossage ("invalid operand for code '%c'", code);
21639           return;
21640         }
21641
21642       else
21643         {
21644           static const char * wc_reg_names [16] =
21645             {
21646               "wCID",  "wCon",  "wCSSF", "wCASF",
21647               "wC4",   "wC5",   "wC6",   "wC7",
21648               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21649               "wC12",  "wC13",  "wC14",  "wC15"
21650             };
21651
21652           fputs (wc_reg_names [INTVAL (x)], stream);
21653         }
21654       return;
21655
21656     /* Print the high single-precision register of a VFP double-precision
21657        register.  */
21658     case 'p':
21659       {
21660         enum machine_mode mode = GET_MODE (x);
21661         int regno;
21662
21663         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21664           {
21665             output_operand_lossage ("invalid operand for code '%c'", code);
21666             return;
21667           }
21668
21669         regno = REGNO (x);
21670         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21671           {
21672             output_operand_lossage ("invalid operand for code '%c'", code);
21673             return;
21674           }
21675
21676         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21677       }
21678       return;
21679
21680     /* Print a VFP/Neon double precision or quad precision register name.  */
21681     case 'P':
21682     case 'q':
21683       {
21684         enum machine_mode mode = GET_MODE (x);
21685         int is_quad = (code == 'q');
21686         int regno;
21687
21688         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21689           {
21690             output_operand_lossage ("invalid operand for code '%c'", code);
21691             return;
21692           }
21693
21694         if (!REG_P (x)
21695             || !IS_VFP_REGNUM (REGNO (x)))
21696           {
21697             output_operand_lossage ("invalid operand for code '%c'", code);
21698             return;
21699           }
21700
21701         regno = REGNO (x);
21702         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21703             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21704           {
21705             output_operand_lossage ("invalid operand for code '%c'", code);
21706             return;
21707           }
21708
21709         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21710           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21711       }
21712       return;
21713
21714     /* These two codes print the low/high doubleword register of a Neon quad
21715        register, respectively.  For pair-structure types, can also print
21716        low/high quadword registers.  */
21717     case 'e':
21718     case 'f':
21719       {
21720         enum machine_mode mode = GET_MODE (x);
21721         int regno;
21722
21723         if ((GET_MODE_SIZE (mode) != 16
21724              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21725           {
21726             output_operand_lossage ("invalid operand for code '%c'", code);
21727             return;
21728           }
21729
21730         regno = REGNO (x);
21731         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21732           {
21733             output_operand_lossage ("invalid operand for code '%c'", code);
21734             return;
21735           }
21736
21737         if (GET_MODE_SIZE (mode) == 16)
21738           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21739                                   + (code == 'f' ? 1 : 0));
21740         else
21741           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21742                                   + (code == 'f' ? 1 : 0));
21743       }
21744       return;
21745
21746     /* Print a VFPv3 floating-point constant, represented as an integer
21747        index.  */
21748     case 'G':
21749       {
21750         int index = vfp3_const_double_index (x);
21751         gcc_assert (index != -1);
21752         fprintf (stream, "%d", index);
21753       }
21754       return;
21755
21756     /* Print bits representing opcode features for Neon.
21757
21758        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
21759        and polynomials as unsigned.
21760
21761        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21762
21763        Bit 2 is 1 for rounding functions, 0 otherwise.  */
21764
21765     /* Identify the type as 's', 'u', 'p' or 'f'.  */
21766     case 'T':
21767       {
21768         HOST_WIDE_INT bits = INTVAL (x);
21769         fputc ("uspf"[bits & 3], stream);
21770       }
21771       return;
21772
21773     /* Likewise, but signed and unsigned integers are both 'i'.  */
21774     case 'F':
21775       {
21776         HOST_WIDE_INT bits = INTVAL (x);
21777         fputc ("iipf"[bits & 3], stream);
21778       }
21779       return;
21780
21781     /* As for 'T', but emit 'u' instead of 'p'.  */
21782     case 't':
21783       {
21784         HOST_WIDE_INT bits = INTVAL (x);
21785         fputc ("usuf"[bits & 3], stream);
21786       }
21787       return;
21788
21789     /* Bit 2: rounding (vs none).  */
21790     case 'O':
21791       {
21792         HOST_WIDE_INT bits = INTVAL (x);
21793         fputs ((bits & 4) != 0 ? "r" : "", stream);
21794       }
21795       return;
21796
21797     /* Memory operand for vld1/vst1 instruction.  */
21798     case 'A':
21799       {
21800         rtx addr;
21801         bool postinc = FALSE;
21802         unsigned align, memsize, align_bits;
21803
21804         gcc_assert (MEM_P (x));
21805         addr = XEXP (x, 0);
21806         if (GET_CODE (addr) == POST_INC)
21807           {
21808             postinc = 1;
21809             addr = XEXP (addr, 0);
21810           }
21811         asm_fprintf (stream, "[%r", REGNO (addr));
21812
21813         /* We know the alignment of this access, so we can emit a hint in the
21814            instruction (for some alignments) as an aid to the memory subsystem
21815            of the target.  */
21816         align = MEM_ALIGN (x) >> 3;
21817         memsize = MEM_SIZE (x);
21818
21819         /* Only certain alignment specifiers are supported by the hardware.  */
21820         if (memsize == 32 && (align % 32) == 0)
21821           align_bits = 256;
21822         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21823           align_bits = 128;
21824         else if (memsize >= 8 && (align % 8) == 0)
21825           align_bits = 64;
21826         else
21827           align_bits = 0;
21828
21829         if (align_bits != 0)
21830           asm_fprintf (stream, ":%d", align_bits);
21831
21832         asm_fprintf (stream, "]");
21833
21834         if (postinc)
21835           fputs("!", stream);
21836       }
21837       return;
21838
21839     case 'C':
21840       {
21841         rtx addr;
21842
21843         gcc_assert (MEM_P (x));
21844         addr = XEXP (x, 0);
21845         gcc_assert (REG_P (addr));
21846         asm_fprintf (stream, "[%r]", REGNO (addr));
21847       }
21848       return;
21849
21850     /* Translate an S register number into a D register number and element index.  */
21851     case 'y':
21852       {
21853         enum machine_mode mode = GET_MODE (x);
21854         int regno;
21855
21856         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21857           {
21858             output_operand_lossage ("invalid operand for code '%c'", code);
21859             return;
21860           }
21861
21862         regno = REGNO (x);
21863         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21864           {
21865             output_operand_lossage ("invalid operand for code '%c'", code);
21866             return;
21867           }
21868
21869         regno = regno - FIRST_VFP_REGNUM;
21870         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21871       }
21872       return;
21873
21874     case 'v':
21875         gcc_assert (CONST_DOUBLE_P (x));
21876         int result;
21877         result = vfp3_const_double_for_fract_bits (x);
21878         if (result == 0)
21879           result = vfp3_const_double_for_bits (x);
21880         fprintf (stream, "#%d", result);
21881         return;
21882
21883     /* Register specifier for vld1.16/vst1.16.  Translate the S register
21884        number into a D register number and element index.  */
21885     case 'z':
21886       {
21887         enum machine_mode mode = GET_MODE (x);
21888         int regno;
21889
21890         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21891           {
21892             output_operand_lossage ("invalid operand for code '%c'", code);
21893             return;
21894           }
21895
21896         regno = REGNO (x);
21897         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21898           {
21899             output_operand_lossage ("invalid operand for code '%c'", code);
21900             return;
21901           }
21902
21903         regno = regno - FIRST_VFP_REGNUM;
21904         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21905       }
21906       return;
21907
21908     default:
21909       if (x == 0)
21910         {
21911           output_operand_lossage ("missing operand");
21912           return;
21913         }
21914
21915       switch (GET_CODE (x))
21916         {
21917         case REG:
21918           asm_fprintf (stream, "%r", REGNO (x));
21919           break;
21920
21921         case MEM:
21922           output_memory_reference_mode = GET_MODE (x);
21923           output_address (XEXP (x, 0));
21924           break;
21925
21926         case CONST_DOUBLE:
21927           if (TARGET_NEON)
21928             {
21929               char fpstr[20];
21930               real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21931                                sizeof (fpstr), 0, 1);
21932               fprintf (stream, "#%s", fpstr);
21933             }
21934           else
21935             fprintf (stream, "#%s", fp_immediate_constant (x));
21936           break;
21937
21938         default:
21939           gcc_assert (GET_CODE (x) != NEG);
21940           fputc ('#', stream);
21941           if (GET_CODE (x) == HIGH)
21942             {
21943               fputs (":lower16:", stream);
21944               x = XEXP (x, 0);
21945             }
21946
21947           output_addr_const (stream, x);
21948           break;
21949         }
21950     }
21951 }
21952 \f
21953 /* Target hook for printing a memory address.  */
21954 static void
21955 arm_print_operand_address (FILE *stream, rtx x)
21956 {
21957   if (TARGET_32BIT)
21958     {
21959       int is_minus = GET_CODE (x) == MINUS;
21960
21961       if (REG_P (x))
21962         asm_fprintf (stream, "[%r]", REGNO (x));
21963       else if (GET_CODE (x) == PLUS || is_minus)
21964         {
21965           rtx base = XEXP (x, 0);
21966           rtx index = XEXP (x, 1);
21967           HOST_WIDE_INT offset = 0;
21968           if (!REG_P (base)
21969               || (REG_P (index) && REGNO (index) == SP_REGNUM))
21970             {
21971               /* Ensure that BASE is a register.  */
21972               /* (one of them must be).  */
21973               /* Also ensure the SP is not used as in index register.  */
21974               rtx temp = base;
21975               base = index;
21976               index = temp;
21977             }
21978           switch (GET_CODE (index))
21979             {
21980             case CONST_INT:
21981               offset = INTVAL (index);
21982               if (is_minus)
21983                 offset = -offset;
21984               asm_fprintf (stream, "[%r, #%wd]",
21985                            REGNO (base), offset);
21986               break;
21987
21988             case REG:
21989               asm_fprintf (stream, "[%r, %s%r]",
21990                            REGNO (base), is_minus ? "-" : "",
21991                            REGNO (index));
21992               break;
21993
21994             case MULT:
21995             case ASHIFTRT:
21996             case LSHIFTRT:
21997             case ASHIFT:
21998             case ROTATERT:
21999               {
22000                 asm_fprintf (stream, "[%r, %s%r",
22001                              REGNO (base), is_minus ? "-" : "",
22002                              REGNO (XEXP (index, 0)));
22003                 arm_print_operand (stream, index, 'S');
22004                 fputs ("]", stream);
22005                 break;
22006               }
22007
22008             default:
22009               gcc_unreachable ();
22010             }
22011         }
22012       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22013                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22014         {
22015           extern enum machine_mode output_memory_reference_mode;
22016
22017           gcc_assert (REG_P (XEXP (x, 0)));
22018
22019           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22020             asm_fprintf (stream, "[%r, #%s%d]!",
22021                          REGNO (XEXP (x, 0)),
22022                          GET_CODE (x) == PRE_DEC ? "-" : "",
22023                          GET_MODE_SIZE (output_memory_reference_mode));
22024           else
22025             asm_fprintf (stream, "[%r], #%s%d",
22026                          REGNO (XEXP (x, 0)),
22027                          GET_CODE (x) == POST_DEC ? "-" : "",
22028                          GET_MODE_SIZE (output_memory_reference_mode));
22029         }
22030       else if (GET_CODE (x) == PRE_MODIFY)
22031         {
22032           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22033           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22034             asm_fprintf (stream, "#%wd]!",
22035                          INTVAL (XEXP (XEXP (x, 1), 1)));
22036           else
22037             asm_fprintf (stream, "%r]!",
22038                          REGNO (XEXP (XEXP (x, 1), 1)));
22039         }
22040       else if (GET_CODE (x) == POST_MODIFY)
22041         {
22042           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22043           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22044             asm_fprintf (stream, "#%wd",
22045                          INTVAL (XEXP (XEXP (x, 1), 1)));
22046           else
22047             asm_fprintf (stream, "%r",
22048                          REGNO (XEXP (XEXP (x, 1), 1)));
22049         }
22050       else output_addr_const (stream, x);
22051     }
22052   else
22053     {
22054       if (REG_P (x))
22055         asm_fprintf (stream, "[%r]", REGNO (x));
22056       else if (GET_CODE (x) == POST_INC)
22057         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22058       else if (GET_CODE (x) == PLUS)
22059         {
22060           gcc_assert (REG_P (XEXP (x, 0)));
22061           if (CONST_INT_P (XEXP (x, 1)))
22062             asm_fprintf (stream, "[%r, #%wd]",
22063                          REGNO (XEXP (x, 0)),
22064                          INTVAL (XEXP (x, 1)));
22065           else
22066             asm_fprintf (stream, "[%r, %r]",
22067                          REGNO (XEXP (x, 0)),
22068                          REGNO (XEXP (x, 1)));
22069         }
22070       else
22071         output_addr_const (stream, x);
22072     }
22073 }
22074 \f
22075 /* Target hook for indicating whether a punctuation character for
22076    TARGET_PRINT_OPERAND is valid.  */
22077 static bool
22078 arm_print_operand_punct_valid_p (unsigned char code)
22079 {
22080   return (code == '@' || code == '|' || code == '.'
22081           || code == '(' || code == ')' || code == '#'
22082           || (TARGET_32BIT && (code == '?'))
22083           || (TARGET_THUMB2 && (code == '!'))
22084           || (TARGET_THUMB && (code == '_')));
22085 }
22086 \f
22087 /* Target hook for assembling integer objects.  The ARM version needs to
22088    handle word-sized values specially.  */
22089 static bool
22090 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22091 {
22092   enum machine_mode mode;
22093
22094   if (size == UNITS_PER_WORD && aligned_p)
22095     {
22096       fputs ("\t.word\t", asm_out_file);
22097       output_addr_const (asm_out_file, x);
22098
22099       /* Mark symbols as position independent.  We only do this in the
22100          .text segment, not in the .data segment.  */
22101       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22102           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22103         {
22104           /* See legitimize_pic_address for an explanation of the
22105              TARGET_VXWORKS_RTP check.  */
22106           if (!arm_pic_data_is_text_relative
22107               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22108             fputs ("(GOT)", asm_out_file);
22109           else
22110             fputs ("(GOTOFF)", asm_out_file);
22111         }
22112       fputc ('\n', asm_out_file);
22113       return true;
22114     }
22115
22116   mode = GET_MODE (x);
22117
22118   if (arm_vector_mode_supported_p (mode))
22119     {
22120       int i, units;
22121
22122       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22123
22124       units = CONST_VECTOR_NUNITS (x);
22125       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22126
22127       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22128         for (i = 0; i < units; i++)
22129           {
22130             rtx elt = CONST_VECTOR_ELT (x, i);
22131             assemble_integer
22132               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22133           }
22134       else
22135         for (i = 0; i < units; i++)
22136           {
22137             rtx elt = CONST_VECTOR_ELT (x, i);
22138             REAL_VALUE_TYPE rval;
22139
22140             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22141
22142             assemble_real
22143               (rval, GET_MODE_INNER (mode),
22144               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22145           }
22146
22147       return true;
22148     }
22149
22150   return default_assemble_integer (x, size, aligned_p);
22151 }
22152
22153 static void
22154 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22155 {
22156   section *s;
22157
22158   if (!TARGET_AAPCS_BASED)
22159     {
22160       (is_ctor ?
22161        default_named_section_asm_out_constructor
22162        : default_named_section_asm_out_destructor) (symbol, priority);
22163       return;
22164     }
22165
22166   /* Put these in the .init_array section, using a special relocation.  */
22167   if (priority != DEFAULT_INIT_PRIORITY)
22168     {
22169       char buf[18];
22170       sprintf (buf, "%s.%.5u",
22171                is_ctor ? ".init_array" : ".fini_array",
22172                priority);
22173       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22174     }
22175   else if (is_ctor)
22176     s = ctors_section;
22177   else
22178     s = dtors_section;
22179
22180   switch_to_section (s);
22181   assemble_align (POINTER_SIZE);
22182   fputs ("\t.word\t", asm_out_file);
22183   output_addr_const (asm_out_file, symbol);
22184   fputs ("(target1)\n", asm_out_file);
22185 }
22186
22187 /* Add a function to the list of static constructors.  */
22188
22189 static void
22190 arm_elf_asm_constructor (rtx symbol, int priority)
22191 {
22192   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22193 }
22194
22195 /* Add a function to the list of static destructors.  */
22196
22197 static void
22198 arm_elf_asm_destructor (rtx symbol, int priority)
22199 {
22200   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22201 }
22202 \f
22203 /* A finite state machine takes care of noticing whether or not instructions
22204    can be conditionally executed, and thus decrease execution time and code
22205    size by deleting branch instructions.  The fsm is controlled by
22206    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22207
22208 /* The state of the fsm controlling condition codes are:
22209    0: normal, do nothing special
22210    1: make ASM_OUTPUT_OPCODE not output this instruction
22211    2: make ASM_OUTPUT_OPCODE not output this instruction
22212    3: make instructions conditional
22213    4: make instructions conditional
22214
22215    State transitions (state->state by whom under condition):
22216    0 -> 1 final_prescan_insn if the `target' is a label
22217    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22218    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22219    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22220    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22221           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22222    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22223           (the target insn is arm_target_insn).
22224
22225    If the jump clobbers the conditions then we use states 2 and 4.
22226
22227    A similar thing can be done with conditional return insns.
22228
22229    XXX In case the `target' is an unconditional branch, this conditionalising
22230    of the instructions always reduces code size, but not always execution
22231    time.  But then, I want to reduce the code size to somewhere near what
22232    /bin/cc produces.  */
22233
22234 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22235    instructions.  When a COND_EXEC instruction is seen the subsequent
22236    instructions are scanned so that multiple conditional instructions can be
22237    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22238    specify the length and true/false mask for the IT block.  These will be
22239    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22240
22241 /* Returns the index of the ARM condition code string in
22242    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22243    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22244
22245 enum arm_cond_code
22246 maybe_get_arm_condition_code (rtx comparison)
22247 {
22248   enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22249   enum arm_cond_code code;
22250   enum rtx_code comp_code = GET_CODE (comparison);
22251
22252   if (GET_MODE_CLASS (mode) != MODE_CC)
22253     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22254                            XEXP (comparison, 1));
22255
22256   switch (mode)
22257     {
22258     case CC_DNEmode: code = ARM_NE; goto dominance;
22259     case CC_DEQmode: code = ARM_EQ; goto dominance;
22260     case CC_DGEmode: code = ARM_GE; goto dominance;
22261     case CC_DGTmode: code = ARM_GT; goto dominance;
22262     case CC_DLEmode: code = ARM_LE; goto dominance;
22263     case CC_DLTmode: code = ARM_LT; goto dominance;
22264     case CC_DGEUmode: code = ARM_CS; goto dominance;
22265     case CC_DGTUmode: code = ARM_HI; goto dominance;
22266     case CC_DLEUmode: code = ARM_LS; goto dominance;
22267     case CC_DLTUmode: code = ARM_CC;
22268
22269     dominance:
22270       if (comp_code == EQ)
22271         return ARM_INVERSE_CONDITION_CODE (code);
22272       if (comp_code == NE)
22273         return code;
22274       return ARM_NV;
22275
22276     case CC_NOOVmode:
22277       switch (comp_code)
22278         {
22279         case NE: return ARM_NE;
22280         case EQ: return ARM_EQ;
22281         case GE: return ARM_PL;
22282         case LT: return ARM_MI;
22283         default: return ARM_NV;
22284         }
22285
22286     case CC_Zmode:
22287       switch (comp_code)
22288         {
22289         case NE: return ARM_NE;
22290         case EQ: return ARM_EQ;
22291         default: return ARM_NV;
22292         }
22293
22294     case CC_Nmode:
22295       switch (comp_code)
22296         {
22297         case NE: return ARM_MI;
22298         case EQ: return ARM_PL;
22299         default: return ARM_NV;
22300         }
22301
22302     case CCFPEmode:
22303     case CCFPmode:
22304       /* We can handle all cases except UNEQ and LTGT.  */
22305       switch (comp_code)
22306         {
22307         case GE: return ARM_GE;
22308         case GT: return ARM_GT;
22309         case LE: return ARM_LS;
22310         case LT: return ARM_MI;
22311         case NE: return ARM_NE;
22312         case EQ: return ARM_EQ;
22313         case ORDERED: return ARM_VC;
22314         case UNORDERED: return ARM_VS;
22315         case UNLT: return ARM_LT;
22316         case UNLE: return ARM_LE;
22317         case UNGT: return ARM_HI;
22318         case UNGE: return ARM_PL;
22319           /* UNEQ and LTGT do not have a representation.  */
22320         case UNEQ: /* Fall through.  */
22321         case LTGT: /* Fall through.  */
22322         default: return ARM_NV;
22323         }
22324
22325     case CC_SWPmode:
22326       switch (comp_code)
22327         {
22328         case NE: return ARM_NE;
22329         case EQ: return ARM_EQ;
22330         case GE: return ARM_LE;
22331         case GT: return ARM_LT;
22332         case LE: return ARM_GE;
22333         case LT: return ARM_GT;
22334         case GEU: return ARM_LS;
22335         case GTU: return ARM_CC;
22336         case LEU: return ARM_CS;
22337         case LTU: return ARM_HI;
22338         default: return ARM_NV;
22339         }
22340
22341     case CC_Cmode:
22342       switch (comp_code)
22343         {
22344         case LTU: return ARM_CS;
22345         case GEU: return ARM_CC;
22346         default: return ARM_NV;
22347         }
22348
22349     case CC_CZmode:
22350       switch (comp_code)
22351         {
22352         case NE: return ARM_NE;
22353         case EQ: return ARM_EQ;
22354         case GEU: return ARM_CS;
22355         case GTU: return ARM_HI;
22356         case LEU: return ARM_LS;
22357         case LTU: return ARM_CC;
22358         default: return ARM_NV;
22359         }
22360
22361     case CC_NCVmode:
22362       switch (comp_code)
22363         {
22364         case GE: return ARM_GE;
22365         case LT: return ARM_LT;
22366         case GEU: return ARM_CS;
22367         case LTU: return ARM_CC;
22368         default: return ARM_NV;
22369         }
22370
22371     case CCmode:
22372       switch (comp_code)
22373         {
22374         case NE: return ARM_NE;
22375         case EQ: return ARM_EQ;
22376         case GE: return ARM_GE;
22377         case GT: return ARM_GT;
22378         case LE: return ARM_LE;
22379         case LT: return ARM_LT;
22380         case GEU: return ARM_CS;
22381         case GTU: return ARM_HI;
22382         case LEU: return ARM_LS;
22383         case LTU: return ARM_CC;
22384         default: return ARM_NV;
22385         }
22386
22387     default: gcc_unreachable ();
22388     }
22389 }
22390
22391 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22392 static enum arm_cond_code
22393 get_arm_condition_code (rtx comparison)
22394 {
22395   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22396   gcc_assert (code != ARM_NV);
22397   return code;
22398 }
22399
22400 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22401    instructions.  */
22402 void
22403 thumb2_final_prescan_insn (rtx insn)
22404 {
22405   rtx first_insn = insn;
22406   rtx body = PATTERN (insn);
22407   rtx predicate;
22408   enum arm_cond_code code;
22409   int n;
22410   int mask;
22411   int max;
22412
22413   /* max_insns_skipped in the tune was already taken into account in the
22414      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22415      just emit the IT blocks as we can.  It does not make sense to split
22416      the IT blocks.  */
22417   max = MAX_INSN_PER_IT_BLOCK;
22418
22419   /* Remove the previous insn from the count of insns to be output.  */
22420   if (arm_condexec_count)
22421       arm_condexec_count--;
22422
22423   /* Nothing to do if we are already inside a conditional block.  */
22424   if (arm_condexec_count)
22425     return;
22426
22427   if (GET_CODE (body) != COND_EXEC)
22428     return;
22429
22430   /* Conditional jumps are implemented directly.  */
22431   if (JUMP_P (insn))
22432     return;
22433
22434   predicate = COND_EXEC_TEST (body);
22435   arm_current_cc = get_arm_condition_code (predicate);
22436
22437   n = get_attr_ce_count (insn);
22438   arm_condexec_count = 1;
22439   arm_condexec_mask = (1 << n) - 1;
22440   arm_condexec_masklen = n;
22441   /* See if subsequent instructions can be combined into the same block.  */
22442   for (;;)
22443     {
22444       insn = next_nonnote_insn (insn);
22445
22446       /* Jumping into the middle of an IT block is illegal, so a label or
22447          barrier terminates the block.  */
22448       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22449         break;
22450
22451       body = PATTERN (insn);
22452       /* USE and CLOBBER aren't really insns, so just skip them.  */
22453       if (GET_CODE (body) == USE
22454           || GET_CODE (body) == CLOBBER)
22455         continue;
22456
22457       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22458       if (GET_CODE (body) != COND_EXEC)
22459         break;
22460       /* Maximum number of conditionally executed instructions in a block.  */
22461       n = get_attr_ce_count (insn);
22462       if (arm_condexec_masklen + n > max)
22463         break;
22464
22465       predicate = COND_EXEC_TEST (body);
22466       code = get_arm_condition_code (predicate);
22467       mask = (1 << n) - 1;
22468       if (arm_current_cc == code)
22469         arm_condexec_mask |= (mask << arm_condexec_masklen);
22470       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22471         break;
22472
22473       arm_condexec_count++;
22474       arm_condexec_masklen += n;
22475
22476       /* A jump must be the last instruction in a conditional block.  */
22477       if (JUMP_P (insn))
22478         break;
22479     }
22480   /* Restore recog_data (getting the attributes of other insns can
22481      destroy this array, but final.c assumes that it remains intact
22482      across this call).  */
22483   extract_constrain_insn_cached (first_insn);
22484 }
22485
22486 void
22487 arm_final_prescan_insn (rtx insn)
22488 {
22489   /* BODY will hold the body of INSN.  */
22490   rtx body = PATTERN (insn);
22491
22492   /* This will be 1 if trying to repeat the trick, and things need to be
22493      reversed if it appears to fail.  */
22494   int reverse = 0;
22495
22496   /* If we start with a return insn, we only succeed if we find another one.  */
22497   int seeking_return = 0;
22498   enum rtx_code return_code = UNKNOWN;
22499
22500   /* START_INSN will hold the insn from where we start looking.  This is the
22501      first insn after the following code_label if REVERSE is true.  */
22502   rtx start_insn = insn;
22503
22504   /* If in state 4, check if the target branch is reached, in order to
22505      change back to state 0.  */
22506   if (arm_ccfsm_state == 4)
22507     {
22508       if (insn == arm_target_insn)
22509         {
22510           arm_target_insn = NULL;
22511           arm_ccfsm_state = 0;
22512         }
22513       return;
22514     }
22515
22516   /* If in state 3, it is possible to repeat the trick, if this insn is an
22517      unconditional branch to a label, and immediately following this branch
22518      is the previous target label which is only used once, and the label this
22519      branch jumps to is not too far off.  */
22520   if (arm_ccfsm_state == 3)
22521     {
22522       if (simplejump_p (insn))
22523         {
22524           start_insn = next_nonnote_insn (start_insn);
22525           if (BARRIER_P (start_insn))
22526             {
22527               /* XXX Isn't this always a barrier?  */
22528               start_insn = next_nonnote_insn (start_insn);
22529             }
22530           if (LABEL_P (start_insn)
22531               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22532               && LABEL_NUSES (start_insn) == 1)
22533             reverse = TRUE;
22534           else
22535             return;
22536         }
22537       else if (ANY_RETURN_P (body))
22538         {
22539           start_insn = next_nonnote_insn (start_insn);
22540           if (BARRIER_P (start_insn))
22541             start_insn = next_nonnote_insn (start_insn);
22542           if (LABEL_P (start_insn)
22543               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22544               && LABEL_NUSES (start_insn) == 1)
22545             {
22546               reverse = TRUE;
22547               seeking_return = 1;
22548               return_code = GET_CODE (body);
22549             }
22550           else
22551             return;
22552         }
22553       else
22554         return;
22555     }
22556
22557   gcc_assert (!arm_ccfsm_state || reverse);
22558   if (!JUMP_P (insn))
22559     return;
22560
22561   /* This jump might be paralleled with a clobber of the condition codes
22562      the jump should always come first */
22563   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22564     body = XVECEXP (body, 0, 0);
22565
22566   if (reverse
22567       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22568           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22569     {
22570       int insns_skipped;
22571       int fail = FALSE, succeed = FALSE;
22572       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22573       int then_not_else = TRUE;
22574       rtx this_insn = start_insn, label = 0;
22575
22576       /* Register the insn jumped to.  */
22577       if (reverse)
22578         {
22579           if (!seeking_return)
22580             label = XEXP (SET_SRC (body), 0);
22581         }
22582       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22583         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22584       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22585         {
22586           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22587           then_not_else = FALSE;
22588         }
22589       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22590         {
22591           seeking_return = 1;
22592           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22593         }
22594       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22595         {
22596           seeking_return = 1;
22597           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22598           then_not_else = FALSE;
22599         }
22600       else
22601         gcc_unreachable ();
22602
22603       /* See how many insns this branch skips, and what kind of insns.  If all
22604          insns are okay, and the label or unconditional branch to the same
22605          label is not too far away, succeed.  */
22606       for (insns_skipped = 0;
22607            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22608         {
22609           rtx scanbody;
22610
22611           this_insn = next_nonnote_insn (this_insn);
22612           if (!this_insn)
22613             break;
22614
22615           switch (GET_CODE (this_insn))
22616             {
22617             case CODE_LABEL:
22618               /* Succeed if it is the target label, otherwise fail since
22619                  control falls in from somewhere else.  */
22620               if (this_insn == label)
22621                 {
22622                   arm_ccfsm_state = 1;
22623                   succeed = TRUE;
22624                 }
22625               else
22626                 fail = TRUE;
22627               break;
22628
22629             case BARRIER:
22630               /* Succeed if the following insn is the target label.
22631                  Otherwise fail.
22632                  If return insns are used then the last insn in a function
22633                  will be a barrier.  */
22634               this_insn = next_nonnote_insn (this_insn);
22635               if (this_insn && this_insn == label)
22636                 {
22637                   arm_ccfsm_state = 1;
22638                   succeed = TRUE;
22639                 }
22640               else
22641                 fail = TRUE;
22642               break;
22643
22644             case CALL_INSN:
22645               /* The AAPCS says that conditional calls should not be
22646                  used since they make interworking inefficient (the
22647                  linker can't transform BL<cond> into BLX).  That's
22648                  only a problem if the machine has BLX.  */
22649               if (arm_arch5)
22650                 {
22651                   fail = TRUE;
22652                   break;
22653                 }
22654
22655               /* Succeed if the following insn is the target label, or
22656                  if the following two insns are a barrier and the
22657                  target label.  */
22658               this_insn = next_nonnote_insn (this_insn);
22659               if (this_insn && BARRIER_P (this_insn))
22660                 this_insn = next_nonnote_insn (this_insn);
22661
22662               if (this_insn && this_insn == label
22663                   && insns_skipped < max_insns_skipped)
22664                 {
22665                   arm_ccfsm_state = 1;
22666                   succeed = TRUE;
22667                 }
22668               else
22669                 fail = TRUE;
22670               break;
22671
22672             case JUMP_INSN:
22673               /* If this is an unconditional branch to the same label, succeed.
22674                  If it is to another label, do nothing.  If it is conditional,
22675                  fail.  */
22676               /* XXX Probably, the tests for SET and the PC are
22677                  unnecessary.  */
22678
22679               scanbody = PATTERN (this_insn);
22680               if (GET_CODE (scanbody) == SET
22681                   && GET_CODE (SET_DEST (scanbody)) == PC)
22682                 {
22683                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22684                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22685                     {
22686                       arm_ccfsm_state = 2;
22687                       succeed = TRUE;
22688                     }
22689                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22690                     fail = TRUE;
22691                 }
22692               /* Fail if a conditional return is undesirable (e.g. on a
22693                  StrongARM), but still allow this if optimizing for size.  */
22694               else if (GET_CODE (scanbody) == return_code
22695                        && !use_return_insn (TRUE, NULL)
22696                        && !optimize_size)
22697                 fail = TRUE;
22698               else if (GET_CODE (scanbody) == return_code)
22699                 {
22700                   arm_ccfsm_state = 2;
22701                   succeed = TRUE;
22702                 }
22703               else if (GET_CODE (scanbody) == PARALLEL)
22704                 {
22705                   switch (get_attr_conds (this_insn))
22706                     {
22707                     case CONDS_NOCOND:
22708                       break;
22709                     default:
22710                       fail = TRUE;
22711                       break;
22712                     }
22713                 }
22714               else
22715                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22716
22717               break;
22718
22719             case INSN:
22720               /* Instructions using or affecting the condition codes make it
22721                  fail.  */
22722               scanbody = PATTERN (this_insn);
22723               if (!(GET_CODE (scanbody) == SET
22724                     || GET_CODE (scanbody) == PARALLEL)
22725                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22726                 fail = TRUE;
22727               break;
22728
22729             default:
22730               break;
22731             }
22732         }
22733       if (succeed)
22734         {
22735           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22736             arm_target_label = CODE_LABEL_NUMBER (label);
22737           else
22738             {
22739               gcc_assert (seeking_return || arm_ccfsm_state == 2);
22740
22741               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22742                 {
22743                   this_insn = next_nonnote_insn (this_insn);
22744                   gcc_assert (!this_insn
22745                               || (!BARRIER_P (this_insn)
22746                                   && !LABEL_P (this_insn)));
22747                 }
22748               if (!this_insn)
22749                 {
22750                   /* Oh, dear! we ran off the end.. give up.  */
22751                   extract_constrain_insn_cached (insn);
22752                   arm_ccfsm_state = 0;
22753                   arm_target_insn = NULL;
22754                   return;
22755                 }
22756               arm_target_insn = this_insn;
22757             }
22758
22759           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22760              what it was.  */
22761           if (!reverse)
22762             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22763
22764           if (reverse || then_not_else)
22765             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22766         }
22767
22768       /* Restore recog_data (getting the attributes of other insns can
22769          destroy this array, but final.c assumes that it remains intact
22770          across this call.  */
22771       extract_constrain_insn_cached (insn);
22772     }
22773 }
22774
22775 /* Output IT instructions.  */
22776 void
22777 thumb2_asm_output_opcode (FILE * stream)
22778 {
22779   char buff[5];
22780   int n;
22781
22782   if (arm_condexec_mask)
22783     {
22784       for (n = 0; n < arm_condexec_masklen; n++)
22785         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22786       buff[n] = 0;
22787       asm_fprintf(stream, "i%s\t%s\n\t", buff,
22788                   arm_condition_codes[arm_current_cc]);
22789       arm_condexec_mask = 0;
22790     }
22791 }
22792
22793 /* Returns true if REGNO is a valid register
22794    for holding a quantity of type MODE.  */
22795 int
22796 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22797 {
22798   if (GET_MODE_CLASS (mode) == MODE_CC)
22799     return (regno == CC_REGNUM
22800             || (TARGET_HARD_FLOAT && TARGET_VFP
22801                 && regno == VFPCC_REGNUM));
22802
22803   if (TARGET_THUMB1)
22804     /* For the Thumb we only allow values bigger than SImode in
22805        registers 0 - 6, so that there is always a second low
22806        register available to hold the upper part of the value.
22807        We probably we ought to ensure that the register is the
22808        start of an even numbered register pair.  */
22809     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22810
22811   if (TARGET_HARD_FLOAT && TARGET_VFP
22812       && IS_VFP_REGNUM (regno))
22813     {
22814       if (mode == SFmode || mode == SImode)
22815         return VFP_REGNO_OK_FOR_SINGLE (regno);
22816
22817       if (mode == DFmode)
22818         return VFP_REGNO_OK_FOR_DOUBLE (regno);
22819
22820       /* VFP registers can hold HFmode values, but there is no point in
22821          putting them there unless we have hardware conversion insns. */
22822       if (mode == HFmode)
22823         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22824
22825       if (TARGET_NEON)
22826         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22827                || (VALID_NEON_QREG_MODE (mode)
22828                    && NEON_REGNO_OK_FOR_QUAD (regno))
22829                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22830                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22831                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22832                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22833                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22834
22835       return FALSE;
22836     }
22837
22838   if (TARGET_REALLY_IWMMXT)
22839     {
22840       if (IS_IWMMXT_GR_REGNUM (regno))
22841         return mode == SImode;
22842
22843       if (IS_IWMMXT_REGNUM (regno))
22844         return VALID_IWMMXT_REG_MODE (mode);
22845     }
22846
22847   /* We allow almost any value to be stored in the general registers.
22848      Restrict doubleword quantities to even register pairs in ARM state
22849      so that we can use ldrd.  Do not allow very large Neon structure
22850      opaque modes in general registers; they would use too many.  */
22851   if (regno <= LAST_ARM_REGNUM)
22852     {
22853       if (ARM_NUM_REGS (mode) > 4)
22854           return FALSE;
22855
22856       if (TARGET_THUMB2)
22857         return TRUE;
22858
22859       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22860     }
22861
22862   if (regno == FRAME_POINTER_REGNUM
22863       || regno == ARG_POINTER_REGNUM)
22864     /* We only allow integers in the fake hard registers.  */
22865     return GET_MODE_CLASS (mode) == MODE_INT;
22866
22867   return FALSE;
22868 }
22869
22870 /* Implement MODES_TIEABLE_P.  */
22871
22872 bool
22873 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22874 {
22875   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22876     return true;
22877
22878   /* We specifically want to allow elements of "structure" modes to
22879      be tieable to the structure.  This more general condition allows
22880      other rarer situations too.  */
22881   if (TARGET_NEON
22882       && (VALID_NEON_DREG_MODE (mode1)
22883           || VALID_NEON_QREG_MODE (mode1)
22884           || VALID_NEON_STRUCT_MODE (mode1))
22885       && (VALID_NEON_DREG_MODE (mode2)
22886           || VALID_NEON_QREG_MODE (mode2)
22887           || VALID_NEON_STRUCT_MODE (mode2)))
22888     return true;
22889
22890   return false;
22891 }
22892
22893 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22894    not used in arm mode.  */
22895
22896 enum reg_class
22897 arm_regno_class (int regno)
22898 {
22899   if (TARGET_THUMB1)
22900     {
22901       if (regno == STACK_POINTER_REGNUM)
22902         return STACK_REG;
22903       if (regno == CC_REGNUM)
22904         return CC_REG;
22905       if (regno < 8)
22906         return LO_REGS;
22907       return HI_REGS;
22908     }
22909
22910   if (TARGET_THUMB2 && regno < 8)
22911     return LO_REGS;
22912
22913   if (   regno <= LAST_ARM_REGNUM
22914       || regno == FRAME_POINTER_REGNUM
22915       || regno == ARG_POINTER_REGNUM)
22916     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22917
22918   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22919     return TARGET_THUMB2 ? CC_REG : NO_REGS;
22920
22921   if (IS_VFP_REGNUM (regno))
22922     {
22923       if (regno <= D7_VFP_REGNUM)
22924         return VFP_D0_D7_REGS;
22925       else if (regno <= LAST_LO_VFP_REGNUM)
22926         return VFP_LO_REGS;
22927       else
22928         return VFP_HI_REGS;
22929     }
22930
22931   if (IS_IWMMXT_REGNUM (regno))
22932     return IWMMXT_REGS;
22933
22934   if (IS_IWMMXT_GR_REGNUM (regno))
22935     return IWMMXT_GR_REGS;
22936
22937   return NO_REGS;
22938 }
22939
22940 /* Handle a special case when computing the offset
22941    of an argument from the frame pointer.  */
22942 int
22943 arm_debugger_arg_offset (int value, rtx addr)
22944 {
22945   rtx insn;
22946
22947   /* We are only interested if dbxout_parms() failed to compute the offset.  */
22948   if (value != 0)
22949     return 0;
22950
22951   /* We can only cope with the case where the address is held in a register.  */
22952   if (!REG_P (addr))
22953     return 0;
22954
22955   /* If we are using the frame pointer to point at the argument, then
22956      an offset of 0 is correct.  */
22957   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22958     return 0;
22959
22960   /* If we are using the stack pointer to point at the
22961      argument, then an offset of 0 is correct.  */
22962   /* ??? Check this is consistent with thumb2 frame layout.  */
22963   if ((TARGET_THUMB || !frame_pointer_needed)
22964       && REGNO (addr) == SP_REGNUM)
22965     return 0;
22966
22967   /* Oh dear.  The argument is pointed to by a register rather
22968      than being held in a register, or being stored at a known
22969      offset from the frame pointer.  Since GDB only understands
22970      those two kinds of argument we must translate the address
22971      held in the register into an offset from the frame pointer.
22972      We do this by searching through the insns for the function
22973      looking to see where this register gets its value.  If the
22974      register is initialized from the frame pointer plus an offset
22975      then we are in luck and we can continue, otherwise we give up.
22976
22977      This code is exercised by producing debugging information
22978      for a function with arguments like this:
22979
22980            double func (double a, double b, int c, double d) {return d;}
22981
22982      Without this code the stab for parameter 'd' will be set to
22983      an offset of 0 from the frame pointer, rather than 8.  */
22984
22985   /* The if() statement says:
22986
22987      If the insn is a normal instruction
22988      and if the insn is setting the value in a register
22989      and if the register being set is the register holding the address of the argument
22990      and if the address is computing by an addition
22991      that involves adding to a register
22992      which is the frame pointer
22993      a constant integer
22994
22995      then...  */
22996
22997   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22998     {
22999       if (   NONJUMP_INSN_P (insn)
23000           && GET_CODE (PATTERN (insn)) == SET
23001           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23002           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23003           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23004           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23005           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23006              )
23007         {
23008           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23009
23010           break;
23011         }
23012     }
23013
23014   if (value == 0)
23015     {
23016       debug_rtx (addr);
23017       warning (0, "unable to compute real location of stacked parameter");
23018       value = 8; /* XXX magic hack */
23019     }
23020
23021   return value;
23022 }
23023 \f
23024 typedef enum {
23025   T_V8QI,
23026   T_V4HI,
23027   T_V4HF,
23028   T_V2SI,
23029   T_V2SF,
23030   T_DI,
23031   T_V16QI,
23032   T_V8HI,
23033   T_V4SI,
23034   T_V4SF,
23035   T_V2DI,
23036   T_TI,
23037   T_EI,
23038   T_OI,
23039   T_MAX         /* Size of enum.  Keep last.  */
23040 } neon_builtin_type_mode;
23041
23042 #define TYPE_MODE_BIT(X) (1 << (X))
23043
23044 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)        \
23045                  | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI)      \
23046                  | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23047 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)       \
23048                  | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)      \
23049                  | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23050
23051 #define v8qi_UP  T_V8QI
23052 #define v4hi_UP  T_V4HI
23053 #define v4hf_UP  T_V4HF
23054 #define v2si_UP  T_V2SI
23055 #define v2sf_UP  T_V2SF
23056 #define di_UP    T_DI
23057 #define v16qi_UP T_V16QI
23058 #define v8hi_UP  T_V8HI
23059 #define v4si_UP  T_V4SI
23060 #define v4sf_UP  T_V4SF
23061 #define v2di_UP  T_V2DI
23062 #define ti_UP    T_TI
23063 #define ei_UP    T_EI
23064 #define oi_UP    T_OI
23065
23066 #define UP(X) X##_UP
23067
23068 typedef enum {
23069   NEON_BINOP,
23070   NEON_TERNOP,
23071   NEON_UNOP,
23072   NEON_GETLANE,
23073   NEON_SETLANE,
23074   NEON_CREATE,
23075   NEON_RINT,
23076   NEON_DUP,
23077   NEON_DUPLANE,
23078   NEON_COMBINE,
23079   NEON_SPLIT,
23080   NEON_LANEMUL,
23081   NEON_LANEMULL,
23082   NEON_LANEMULH,
23083   NEON_LANEMAC,
23084   NEON_SCALARMUL,
23085   NEON_SCALARMULL,
23086   NEON_SCALARMULH,
23087   NEON_SCALARMAC,
23088   NEON_CONVERT,
23089   NEON_FLOAT_WIDEN,
23090   NEON_FLOAT_NARROW,
23091   NEON_FIXCONV,
23092   NEON_SELECT,
23093   NEON_RESULTPAIR,
23094   NEON_REINTERP,
23095   NEON_VTBL,
23096   NEON_VTBX,
23097   NEON_LOAD1,
23098   NEON_LOAD1LANE,
23099   NEON_STORE1,
23100   NEON_STORE1LANE,
23101   NEON_LOADSTRUCT,
23102   NEON_LOADSTRUCTLANE,
23103   NEON_STORESTRUCT,
23104   NEON_STORESTRUCTLANE,
23105   NEON_LOGICBINOP,
23106   NEON_SHIFTINSERT,
23107   NEON_SHIFTIMM,
23108   NEON_SHIFTACC
23109 } neon_itype;
23110
23111 typedef struct {
23112   const char *name;
23113   const neon_itype itype;
23114   const neon_builtin_type_mode mode;
23115   const enum insn_code code;
23116   unsigned int fcode;
23117 } neon_builtin_datum;
23118
23119 #define CF(N,X) CODE_FOR_neon_##N##X
23120
23121 #define VAR1(T, N, A) \
23122   {#N, NEON_##T, UP (A), CF (N, A), 0}
23123 #define VAR2(T, N, A, B) \
23124   VAR1 (T, N, A), \
23125   {#N, NEON_##T, UP (B), CF (N, B), 0}
23126 #define VAR3(T, N, A, B, C) \
23127   VAR2 (T, N, A, B), \
23128   {#N, NEON_##T, UP (C), CF (N, C), 0}
23129 #define VAR4(T, N, A, B, C, D) \
23130   VAR3 (T, N, A, B, C), \
23131   {#N, NEON_##T, UP (D), CF (N, D), 0}
23132 #define VAR5(T, N, A, B, C, D, E) \
23133   VAR4 (T, N, A, B, C, D), \
23134   {#N, NEON_##T, UP (E), CF (N, E), 0}
23135 #define VAR6(T, N, A, B, C, D, E, F) \
23136   VAR5 (T, N, A, B, C, D, E), \
23137   {#N, NEON_##T, UP (F), CF (N, F), 0}
23138 #define VAR7(T, N, A, B, C, D, E, F, G) \
23139   VAR6 (T, N, A, B, C, D, E, F), \
23140   {#N, NEON_##T, UP (G), CF (N, G), 0}
23141 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23142   VAR7 (T, N, A, B, C, D, E, F, G), \
23143   {#N, NEON_##T, UP (H), CF (N, H), 0}
23144 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23145   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23146   {#N, NEON_##T, UP (I), CF (N, I), 0}
23147 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23148   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23149   {#N, NEON_##T, UP (J), CF (N, J), 0}
23150
23151 /* The NEON builtin data can be found in arm_neon_builtins.def.
23152    The mode entries in the following table correspond to the "key" type of the
23153    instruction variant, i.e. equivalent to that which would be specified after
23154    the assembler mnemonic, which usually refers to the last vector operand.
23155    (Signed/unsigned/polynomial types are not differentiated between though, and
23156    are all mapped onto the same mode for a given element size.) The modes
23157    listed per instruction should be the same as those defined for that
23158    instruction's pattern in neon.md.  */
23159
23160 static neon_builtin_datum neon_builtin_data[] =
23161 {
23162 #include "arm_neon_builtins.def"
23163 };
23164
23165 #undef CF
23166 #undef VAR1
23167 #undef VAR2
23168 #undef VAR3
23169 #undef VAR4
23170 #undef VAR5
23171 #undef VAR6
23172 #undef VAR7
23173 #undef VAR8
23174 #undef VAR9
23175 #undef VAR10
23176
23177 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23178 #define VAR1(T, N, A) \
23179   CF (N, A)
23180 #define VAR2(T, N, A, B) \
23181   VAR1 (T, N, A), \
23182   CF (N, B)
23183 #define VAR3(T, N, A, B, C) \
23184   VAR2 (T, N, A, B), \
23185   CF (N, C)
23186 #define VAR4(T, N, A, B, C, D) \
23187   VAR3 (T, N, A, B, C), \
23188   CF (N, D)
23189 #define VAR5(T, N, A, B, C, D, E) \
23190   VAR4 (T, N, A, B, C, D), \
23191   CF (N, E)
23192 #define VAR6(T, N, A, B, C, D, E, F) \
23193   VAR5 (T, N, A, B, C, D, E), \
23194   CF (N, F)
23195 #define VAR7(T, N, A, B, C, D, E, F, G) \
23196   VAR6 (T, N, A, B, C, D, E, F), \
23197   CF (N, G)
23198 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23199   VAR7 (T, N, A, B, C, D, E, F, G), \
23200   CF (N, H)
23201 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23202   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23203   CF (N, I)
23204 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23205   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23206   CF (N, J)
23207 enum arm_builtins
23208 {
23209   ARM_BUILTIN_GETWCGR0,
23210   ARM_BUILTIN_GETWCGR1,
23211   ARM_BUILTIN_GETWCGR2,
23212   ARM_BUILTIN_GETWCGR3,
23213
23214   ARM_BUILTIN_SETWCGR0,
23215   ARM_BUILTIN_SETWCGR1,
23216   ARM_BUILTIN_SETWCGR2,
23217   ARM_BUILTIN_SETWCGR3,
23218
23219   ARM_BUILTIN_WZERO,
23220
23221   ARM_BUILTIN_WAVG2BR,
23222   ARM_BUILTIN_WAVG2HR,
23223   ARM_BUILTIN_WAVG2B,
23224   ARM_BUILTIN_WAVG2H,
23225
23226   ARM_BUILTIN_WACCB,
23227   ARM_BUILTIN_WACCH,
23228   ARM_BUILTIN_WACCW,
23229
23230   ARM_BUILTIN_WMACS,
23231   ARM_BUILTIN_WMACSZ,
23232   ARM_BUILTIN_WMACU,
23233   ARM_BUILTIN_WMACUZ,
23234
23235   ARM_BUILTIN_WSADB,
23236   ARM_BUILTIN_WSADBZ,
23237   ARM_BUILTIN_WSADH,
23238   ARM_BUILTIN_WSADHZ,
23239
23240   ARM_BUILTIN_WALIGNI,
23241   ARM_BUILTIN_WALIGNR0,
23242   ARM_BUILTIN_WALIGNR1,
23243   ARM_BUILTIN_WALIGNR2,
23244   ARM_BUILTIN_WALIGNR3,
23245
23246   ARM_BUILTIN_TMIA,
23247   ARM_BUILTIN_TMIAPH,
23248   ARM_BUILTIN_TMIABB,
23249   ARM_BUILTIN_TMIABT,
23250   ARM_BUILTIN_TMIATB,
23251   ARM_BUILTIN_TMIATT,
23252
23253   ARM_BUILTIN_TMOVMSKB,
23254   ARM_BUILTIN_TMOVMSKH,
23255   ARM_BUILTIN_TMOVMSKW,
23256
23257   ARM_BUILTIN_TBCSTB,
23258   ARM_BUILTIN_TBCSTH,
23259   ARM_BUILTIN_TBCSTW,
23260
23261   ARM_BUILTIN_WMADDS,
23262   ARM_BUILTIN_WMADDU,
23263
23264   ARM_BUILTIN_WPACKHSS,
23265   ARM_BUILTIN_WPACKWSS,
23266   ARM_BUILTIN_WPACKDSS,
23267   ARM_BUILTIN_WPACKHUS,
23268   ARM_BUILTIN_WPACKWUS,
23269   ARM_BUILTIN_WPACKDUS,
23270
23271   ARM_BUILTIN_WADDB,
23272   ARM_BUILTIN_WADDH,
23273   ARM_BUILTIN_WADDW,
23274   ARM_BUILTIN_WADDSSB,
23275   ARM_BUILTIN_WADDSSH,
23276   ARM_BUILTIN_WADDSSW,
23277   ARM_BUILTIN_WADDUSB,
23278   ARM_BUILTIN_WADDUSH,
23279   ARM_BUILTIN_WADDUSW,
23280   ARM_BUILTIN_WSUBB,
23281   ARM_BUILTIN_WSUBH,
23282   ARM_BUILTIN_WSUBW,
23283   ARM_BUILTIN_WSUBSSB,
23284   ARM_BUILTIN_WSUBSSH,
23285   ARM_BUILTIN_WSUBSSW,
23286   ARM_BUILTIN_WSUBUSB,
23287   ARM_BUILTIN_WSUBUSH,
23288   ARM_BUILTIN_WSUBUSW,
23289
23290   ARM_BUILTIN_WAND,
23291   ARM_BUILTIN_WANDN,
23292   ARM_BUILTIN_WOR,
23293   ARM_BUILTIN_WXOR,
23294
23295   ARM_BUILTIN_WCMPEQB,
23296   ARM_BUILTIN_WCMPEQH,
23297   ARM_BUILTIN_WCMPEQW,
23298   ARM_BUILTIN_WCMPGTUB,
23299   ARM_BUILTIN_WCMPGTUH,
23300   ARM_BUILTIN_WCMPGTUW,
23301   ARM_BUILTIN_WCMPGTSB,
23302   ARM_BUILTIN_WCMPGTSH,
23303   ARM_BUILTIN_WCMPGTSW,
23304
23305   ARM_BUILTIN_TEXTRMSB,
23306   ARM_BUILTIN_TEXTRMSH,
23307   ARM_BUILTIN_TEXTRMSW,
23308   ARM_BUILTIN_TEXTRMUB,
23309   ARM_BUILTIN_TEXTRMUH,
23310   ARM_BUILTIN_TEXTRMUW,
23311   ARM_BUILTIN_TINSRB,
23312   ARM_BUILTIN_TINSRH,
23313   ARM_BUILTIN_TINSRW,
23314
23315   ARM_BUILTIN_WMAXSW,
23316   ARM_BUILTIN_WMAXSH,
23317   ARM_BUILTIN_WMAXSB,
23318   ARM_BUILTIN_WMAXUW,
23319   ARM_BUILTIN_WMAXUH,
23320   ARM_BUILTIN_WMAXUB,
23321   ARM_BUILTIN_WMINSW,
23322   ARM_BUILTIN_WMINSH,
23323   ARM_BUILTIN_WMINSB,
23324   ARM_BUILTIN_WMINUW,
23325   ARM_BUILTIN_WMINUH,
23326   ARM_BUILTIN_WMINUB,
23327
23328   ARM_BUILTIN_WMULUM,
23329   ARM_BUILTIN_WMULSM,
23330   ARM_BUILTIN_WMULUL,
23331
23332   ARM_BUILTIN_PSADBH,
23333   ARM_BUILTIN_WSHUFH,
23334
23335   ARM_BUILTIN_WSLLH,
23336   ARM_BUILTIN_WSLLW,
23337   ARM_BUILTIN_WSLLD,
23338   ARM_BUILTIN_WSRAH,
23339   ARM_BUILTIN_WSRAW,
23340   ARM_BUILTIN_WSRAD,
23341   ARM_BUILTIN_WSRLH,
23342   ARM_BUILTIN_WSRLW,
23343   ARM_BUILTIN_WSRLD,
23344   ARM_BUILTIN_WRORH,
23345   ARM_BUILTIN_WRORW,
23346   ARM_BUILTIN_WRORD,
23347   ARM_BUILTIN_WSLLHI,
23348   ARM_BUILTIN_WSLLWI,
23349   ARM_BUILTIN_WSLLDI,
23350   ARM_BUILTIN_WSRAHI,
23351   ARM_BUILTIN_WSRAWI,
23352   ARM_BUILTIN_WSRADI,
23353   ARM_BUILTIN_WSRLHI,
23354   ARM_BUILTIN_WSRLWI,
23355   ARM_BUILTIN_WSRLDI,
23356   ARM_BUILTIN_WRORHI,
23357   ARM_BUILTIN_WRORWI,
23358   ARM_BUILTIN_WRORDI,
23359
23360   ARM_BUILTIN_WUNPCKIHB,
23361   ARM_BUILTIN_WUNPCKIHH,
23362   ARM_BUILTIN_WUNPCKIHW,
23363   ARM_BUILTIN_WUNPCKILB,
23364   ARM_BUILTIN_WUNPCKILH,
23365   ARM_BUILTIN_WUNPCKILW,
23366
23367   ARM_BUILTIN_WUNPCKEHSB,
23368   ARM_BUILTIN_WUNPCKEHSH,
23369   ARM_BUILTIN_WUNPCKEHSW,
23370   ARM_BUILTIN_WUNPCKEHUB,
23371   ARM_BUILTIN_WUNPCKEHUH,
23372   ARM_BUILTIN_WUNPCKEHUW,
23373   ARM_BUILTIN_WUNPCKELSB,
23374   ARM_BUILTIN_WUNPCKELSH,
23375   ARM_BUILTIN_WUNPCKELSW,
23376   ARM_BUILTIN_WUNPCKELUB,
23377   ARM_BUILTIN_WUNPCKELUH,
23378   ARM_BUILTIN_WUNPCKELUW,
23379
23380   ARM_BUILTIN_WABSB,
23381   ARM_BUILTIN_WABSH,
23382   ARM_BUILTIN_WABSW,
23383
23384   ARM_BUILTIN_WADDSUBHX,
23385   ARM_BUILTIN_WSUBADDHX,
23386
23387   ARM_BUILTIN_WABSDIFFB,
23388   ARM_BUILTIN_WABSDIFFH,
23389   ARM_BUILTIN_WABSDIFFW,
23390
23391   ARM_BUILTIN_WADDCH,
23392   ARM_BUILTIN_WADDCW,
23393
23394   ARM_BUILTIN_WAVG4,
23395   ARM_BUILTIN_WAVG4R,
23396
23397   ARM_BUILTIN_WMADDSX,
23398   ARM_BUILTIN_WMADDUX,
23399
23400   ARM_BUILTIN_WMADDSN,
23401   ARM_BUILTIN_WMADDUN,
23402
23403   ARM_BUILTIN_WMULWSM,
23404   ARM_BUILTIN_WMULWUM,
23405
23406   ARM_BUILTIN_WMULWSMR,
23407   ARM_BUILTIN_WMULWUMR,
23408
23409   ARM_BUILTIN_WMULWL,
23410
23411   ARM_BUILTIN_WMULSMR,
23412   ARM_BUILTIN_WMULUMR,
23413
23414   ARM_BUILTIN_WQMULM,
23415   ARM_BUILTIN_WQMULMR,
23416
23417   ARM_BUILTIN_WQMULWM,
23418   ARM_BUILTIN_WQMULWMR,
23419
23420   ARM_BUILTIN_WADDBHUSM,
23421   ARM_BUILTIN_WADDBHUSL,
23422
23423   ARM_BUILTIN_WQMIABB,
23424   ARM_BUILTIN_WQMIABT,
23425   ARM_BUILTIN_WQMIATB,
23426   ARM_BUILTIN_WQMIATT,
23427
23428   ARM_BUILTIN_WQMIABBN,
23429   ARM_BUILTIN_WQMIABTN,
23430   ARM_BUILTIN_WQMIATBN,
23431   ARM_BUILTIN_WQMIATTN,
23432
23433   ARM_BUILTIN_WMIABB,
23434   ARM_BUILTIN_WMIABT,
23435   ARM_BUILTIN_WMIATB,
23436   ARM_BUILTIN_WMIATT,
23437
23438   ARM_BUILTIN_WMIABBN,
23439   ARM_BUILTIN_WMIABTN,
23440   ARM_BUILTIN_WMIATBN,
23441   ARM_BUILTIN_WMIATTN,
23442
23443   ARM_BUILTIN_WMIAWBB,
23444   ARM_BUILTIN_WMIAWBT,
23445   ARM_BUILTIN_WMIAWTB,
23446   ARM_BUILTIN_WMIAWTT,
23447
23448   ARM_BUILTIN_WMIAWBBN,
23449   ARM_BUILTIN_WMIAWBTN,
23450   ARM_BUILTIN_WMIAWTBN,
23451   ARM_BUILTIN_WMIAWTTN,
23452
23453   ARM_BUILTIN_WMERGE,
23454
23455   ARM_BUILTIN_CRC32B,
23456   ARM_BUILTIN_CRC32H,
23457   ARM_BUILTIN_CRC32W,
23458   ARM_BUILTIN_CRC32CB,
23459   ARM_BUILTIN_CRC32CH,
23460   ARM_BUILTIN_CRC32CW,
23461
23462 #undef CRYPTO1
23463 #undef CRYPTO2
23464 #undef CRYPTO3
23465
23466 #define CRYPTO1(L, U, M1, M2) \
23467   ARM_BUILTIN_CRYPTO_##U,
23468 #define CRYPTO2(L, U, M1, M2, M3) \
23469   ARM_BUILTIN_CRYPTO_##U,
23470 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23471   ARM_BUILTIN_CRYPTO_##U,
23472
23473 #include "crypto.def"
23474
23475 #undef CRYPTO1
23476 #undef CRYPTO2
23477 #undef CRYPTO3
23478
23479 #include "arm_neon_builtins.def"
23480
23481   ,ARM_BUILTIN_MAX
23482 };
23483
23484 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23485
23486 #undef CF
23487 #undef VAR1
23488 #undef VAR2
23489 #undef VAR3
23490 #undef VAR4
23491 #undef VAR5
23492 #undef VAR6
23493 #undef VAR7
23494 #undef VAR8
23495 #undef VAR9
23496 #undef VAR10
23497
23498 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23499
23500 #define NUM_DREG_TYPES 5
23501 #define NUM_QREG_TYPES 6
23502
23503 static void
23504 arm_init_neon_builtins (void)
23505 {
23506   unsigned int i, fcode;
23507   tree decl;
23508
23509   tree neon_intQI_type_node;
23510   tree neon_intHI_type_node;
23511   tree neon_floatHF_type_node;
23512   tree neon_polyQI_type_node;
23513   tree neon_polyHI_type_node;
23514   tree neon_intSI_type_node;
23515   tree neon_intDI_type_node;
23516   tree neon_intUTI_type_node;
23517   tree neon_float_type_node;
23518
23519   tree intQI_pointer_node;
23520   tree intHI_pointer_node;
23521   tree intSI_pointer_node;
23522   tree intDI_pointer_node;
23523   tree float_pointer_node;
23524
23525   tree const_intQI_node;
23526   tree const_intHI_node;
23527   tree const_intSI_node;
23528   tree const_intDI_node;
23529   tree const_float_node;
23530
23531   tree const_intQI_pointer_node;
23532   tree const_intHI_pointer_node;
23533   tree const_intSI_pointer_node;
23534   tree const_intDI_pointer_node;
23535   tree const_float_pointer_node;
23536
23537   tree V8QI_type_node;
23538   tree V4HI_type_node;
23539   tree V4HF_type_node;
23540   tree V2SI_type_node;
23541   tree V2SF_type_node;
23542   tree V16QI_type_node;
23543   tree V8HI_type_node;
23544   tree V4SI_type_node;
23545   tree V4SF_type_node;
23546   tree V2DI_type_node;
23547
23548   tree intUQI_type_node;
23549   tree intUHI_type_node;
23550   tree intUSI_type_node;
23551   tree intUDI_type_node;
23552
23553   tree intEI_type_node;
23554   tree intOI_type_node;
23555   tree intCI_type_node;
23556   tree intXI_type_node;
23557
23558   tree V8QI_pointer_node;
23559   tree V4HI_pointer_node;
23560   tree V2SI_pointer_node;
23561   tree V2SF_pointer_node;
23562   tree V16QI_pointer_node;
23563   tree V8HI_pointer_node;
23564   tree V4SI_pointer_node;
23565   tree V4SF_pointer_node;
23566   tree V2DI_pointer_node;
23567
23568   tree void_ftype_pv8qi_v8qi_v8qi;
23569   tree void_ftype_pv4hi_v4hi_v4hi;
23570   tree void_ftype_pv2si_v2si_v2si;
23571   tree void_ftype_pv2sf_v2sf_v2sf;
23572   tree void_ftype_pdi_di_di;
23573   tree void_ftype_pv16qi_v16qi_v16qi;
23574   tree void_ftype_pv8hi_v8hi_v8hi;
23575   tree void_ftype_pv4si_v4si_v4si;
23576   tree void_ftype_pv4sf_v4sf_v4sf;
23577   tree void_ftype_pv2di_v2di_v2di;
23578
23579   tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23580   tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23581   tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23582
23583   /* Create distinguished type nodes for NEON vector element types,
23584      and pointers to values of such types, so we can detect them later.  */
23585   neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23586   neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23587   neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23588   neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23589   neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23590   neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23591   neon_float_type_node = make_node (REAL_TYPE);
23592   TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23593   layout_type (neon_float_type_node);
23594   neon_floatHF_type_node = make_node (REAL_TYPE);
23595   TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23596   layout_type (neon_floatHF_type_node);
23597
23598   /* Define typedefs which exactly correspond to the modes we are basing vector
23599      types on.  If you change these names you'll need to change
23600      the table used by arm_mangle_type too.  */
23601   (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23602                                              "__builtin_neon_qi");
23603   (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23604                                              "__builtin_neon_hi");
23605   (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23606                                              "__builtin_neon_hf");
23607   (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23608                                              "__builtin_neon_si");
23609   (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23610                                              "__builtin_neon_sf");
23611   (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23612                                              "__builtin_neon_di");
23613   (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23614                                              "__builtin_neon_poly8");
23615   (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23616                                              "__builtin_neon_poly16");
23617
23618   intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23619   intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23620   intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23621   intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23622   float_pointer_node = build_pointer_type (neon_float_type_node);
23623
23624   /* Next create constant-qualified versions of the above types.  */
23625   const_intQI_node = build_qualified_type (neon_intQI_type_node,
23626                                            TYPE_QUAL_CONST);
23627   const_intHI_node = build_qualified_type (neon_intHI_type_node,
23628                                            TYPE_QUAL_CONST);
23629   const_intSI_node = build_qualified_type (neon_intSI_type_node,
23630                                            TYPE_QUAL_CONST);
23631   const_intDI_node = build_qualified_type (neon_intDI_type_node,
23632                                            TYPE_QUAL_CONST);
23633   const_float_node = build_qualified_type (neon_float_type_node,
23634                                            TYPE_QUAL_CONST);
23635
23636   const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23637   const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23638   const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23639   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23640   const_float_pointer_node = build_pointer_type (const_float_node);
23641
23642   /* Now create vector types based on our NEON element types.  */
23643   /* 64-bit vectors.  */
23644   V8QI_type_node =
23645     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23646   V4HI_type_node =
23647     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23648   V4HF_type_node =
23649     build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23650   V2SI_type_node =
23651     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23652   V2SF_type_node =
23653     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23654   /* 128-bit vectors.  */
23655   V16QI_type_node =
23656     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23657   V8HI_type_node =
23658     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23659   V4SI_type_node =
23660     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23661   V4SF_type_node =
23662     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23663   V2DI_type_node =
23664     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23665
23666   /* Unsigned integer types for various mode sizes.  */
23667   intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23668   intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23669   intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23670   intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23671   neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23672
23673
23674   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23675                                              "__builtin_neon_uqi");
23676   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23677                                              "__builtin_neon_uhi");
23678   (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23679                                              "__builtin_neon_usi");
23680   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23681                                              "__builtin_neon_udi");
23682   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23683                                              "__builtin_neon_poly64");
23684   (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23685                                              "__builtin_neon_poly128");
23686
23687   /* Opaque integer types for structures of vectors.  */
23688   intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23689   intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23690   intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23691   intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23692
23693   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23694                                              "__builtin_neon_ti");
23695   (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23696                                              "__builtin_neon_ei");
23697   (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23698                                              "__builtin_neon_oi");
23699   (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23700                                              "__builtin_neon_ci");
23701   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23702                                              "__builtin_neon_xi");
23703
23704   /* Pointers to vector types.  */
23705   V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23706   V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23707   V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23708   V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23709   V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23710   V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23711   V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23712   V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23713   V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23714
23715   /* Operations which return results as pairs.  */
23716   void_ftype_pv8qi_v8qi_v8qi =
23717     build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23718                               V8QI_type_node, NULL);
23719   void_ftype_pv4hi_v4hi_v4hi =
23720     build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23721                               V4HI_type_node, NULL);
23722   void_ftype_pv2si_v2si_v2si =
23723     build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23724                               V2SI_type_node, NULL);
23725   void_ftype_pv2sf_v2sf_v2sf =
23726     build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23727                               V2SF_type_node, NULL);
23728   void_ftype_pdi_di_di =
23729     build_function_type_list (void_type_node, intDI_pointer_node,
23730                               neon_intDI_type_node, neon_intDI_type_node, NULL);
23731   void_ftype_pv16qi_v16qi_v16qi =
23732     build_function_type_list (void_type_node, V16QI_pointer_node,
23733                               V16QI_type_node, V16QI_type_node, NULL);
23734   void_ftype_pv8hi_v8hi_v8hi =
23735     build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23736                               V8HI_type_node, NULL);
23737   void_ftype_pv4si_v4si_v4si =
23738     build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23739                               V4SI_type_node, NULL);
23740   void_ftype_pv4sf_v4sf_v4sf =
23741     build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23742                               V4SF_type_node, NULL);
23743   void_ftype_pv2di_v2di_v2di =
23744     build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23745                               V2DI_type_node, NULL);
23746
23747   if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23748   {
23749     tree V4USI_type_node =
23750       build_vector_type_for_mode (intUSI_type_node, V4SImode);
23751
23752     tree V16UQI_type_node =
23753       build_vector_type_for_mode (intUQI_type_node, V16QImode);
23754
23755     tree v16uqi_ftype_v16uqi
23756       = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23757
23758     tree v16uqi_ftype_v16uqi_v16uqi
23759       = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23760                                   V16UQI_type_node, NULL_TREE);
23761
23762     tree v4usi_ftype_v4usi
23763       = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23764
23765     tree v4usi_ftype_v4usi_v4usi
23766       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23767                                   V4USI_type_node, NULL_TREE);
23768
23769     tree v4usi_ftype_v4usi_v4usi_v4usi
23770       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23771                                   V4USI_type_node, V4USI_type_node, NULL_TREE);
23772
23773     tree uti_ftype_udi_udi
23774       = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23775                                   intUDI_type_node, NULL_TREE);
23776
23777     #undef CRYPTO1
23778     #undef CRYPTO2
23779     #undef CRYPTO3
23780     #undef C
23781     #undef N
23782     #undef CF
23783     #undef FT1
23784     #undef FT2
23785     #undef FT3
23786
23787     #define C(U) \
23788       ARM_BUILTIN_CRYPTO_##U
23789     #define N(L) \
23790       "__builtin_arm_crypto_"#L
23791     #define FT1(R, A) \
23792       R##_ftype_##A
23793     #define FT2(R, A1, A2) \
23794       R##_ftype_##A1##_##A2
23795     #define FT3(R, A1, A2, A3) \
23796       R##_ftype_##A1##_##A2##_##A3
23797     #define CRYPTO1(L, U, R, A) \
23798       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23799                                                        C (U), BUILT_IN_MD, \
23800                                                        NULL, NULL_TREE);
23801     #define CRYPTO2(L, U, R, A1, A2) \
23802       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23803                                                        C (U), BUILT_IN_MD, \
23804                                                        NULL, NULL_TREE);
23805
23806     #define CRYPTO3(L, U, R, A1, A2, A3) \
23807       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23808                                                        C (U), BUILT_IN_MD, \
23809                                                        NULL, NULL_TREE);
23810     #include "crypto.def"
23811
23812     #undef CRYPTO1
23813     #undef CRYPTO2
23814     #undef CRYPTO3
23815     #undef C
23816     #undef N
23817     #undef FT1
23818     #undef FT2
23819     #undef FT3
23820   }
23821   dreg_types[0] = V8QI_type_node;
23822   dreg_types[1] = V4HI_type_node;
23823   dreg_types[2] = V2SI_type_node;
23824   dreg_types[3] = V2SF_type_node;
23825   dreg_types[4] = neon_intDI_type_node;
23826
23827   qreg_types[0] = V16QI_type_node;
23828   qreg_types[1] = V8HI_type_node;
23829   qreg_types[2] = V4SI_type_node;
23830   qreg_types[3] = V4SF_type_node;
23831   qreg_types[4] = V2DI_type_node;
23832   qreg_types[5] = neon_intUTI_type_node;
23833
23834   for (i = 0; i < NUM_QREG_TYPES; i++)
23835     {
23836       int j;
23837       for (j = 0; j < NUM_QREG_TYPES; j++)
23838         {
23839           if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23840             reinterp_ftype_dreg[i][j]
23841               = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23842
23843           reinterp_ftype_qreg[i][j]
23844             = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23845         }
23846     }
23847
23848   for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23849        i < ARRAY_SIZE (neon_builtin_data);
23850        i++, fcode++)
23851     {
23852       neon_builtin_datum *d = &neon_builtin_data[i];
23853
23854       const char* const modenames[] = {
23855         "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23856         "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23857         "ti", "ei", "oi"
23858       };
23859       char namebuf[60];
23860       tree ftype = NULL;
23861       int is_load = 0, is_store = 0;
23862
23863       gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23864
23865       d->fcode = fcode;
23866
23867       switch (d->itype)
23868         {
23869         case NEON_LOAD1:
23870         case NEON_LOAD1LANE:
23871         case NEON_LOADSTRUCT:
23872         case NEON_LOADSTRUCTLANE:
23873           is_load = 1;
23874           /* Fall through.  */
23875         case NEON_STORE1:
23876         case NEON_STORE1LANE:
23877         case NEON_STORESTRUCT:
23878         case NEON_STORESTRUCTLANE:
23879           if (!is_load)
23880             is_store = 1;
23881           /* Fall through.  */
23882         case NEON_UNOP:
23883         case NEON_RINT:
23884         case NEON_BINOP:
23885         case NEON_LOGICBINOP:
23886         case NEON_SHIFTINSERT:
23887         case NEON_TERNOP:
23888         case NEON_GETLANE:
23889         case NEON_SETLANE:
23890         case NEON_CREATE:
23891         case NEON_DUP:
23892         case NEON_DUPLANE:
23893         case NEON_SHIFTIMM:
23894         case NEON_SHIFTACC:
23895         case NEON_COMBINE:
23896         case NEON_SPLIT:
23897         case NEON_CONVERT:
23898         case NEON_FIXCONV:
23899         case NEON_LANEMUL:
23900         case NEON_LANEMULL:
23901         case NEON_LANEMULH:
23902         case NEON_LANEMAC:
23903         case NEON_SCALARMUL:
23904         case NEON_SCALARMULL:
23905         case NEON_SCALARMULH:
23906         case NEON_SCALARMAC:
23907         case NEON_SELECT:
23908         case NEON_VTBL:
23909         case NEON_VTBX:
23910           {
23911             int k;
23912             tree return_type = void_type_node, args = void_list_node;
23913
23914             /* Build a function type directly from the insn_data for
23915                this builtin.  The build_function_type() function takes
23916                care of removing duplicates for us.  */
23917             for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23918               {
23919                 tree eltype;
23920
23921                 if (is_load && k == 1)
23922                   {
23923                     /* Neon load patterns always have the memory
23924                        operand in the operand 1 position.  */
23925                     gcc_assert (insn_data[d->code].operand[k].predicate
23926                                 == neon_struct_operand);
23927
23928                     switch (d->mode)
23929                       {
23930                       case T_V8QI:
23931                       case T_V16QI:
23932                         eltype = const_intQI_pointer_node;
23933                         break;
23934
23935                       case T_V4HI:
23936                       case T_V8HI:
23937                         eltype = const_intHI_pointer_node;
23938                         break;
23939
23940                       case T_V2SI:
23941                       case T_V4SI:
23942                         eltype = const_intSI_pointer_node;
23943                         break;
23944
23945                       case T_V2SF:
23946                       case T_V4SF:
23947                         eltype = const_float_pointer_node;
23948                         break;
23949
23950                       case T_DI:
23951                       case T_V2DI:
23952                         eltype = const_intDI_pointer_node;
23953                         break;
23954
23955                       default: gcc_unreachable ();
23956                       }
23957                   }
23958                 else if (is_store && k == 0)
23959                   {
23960                     /* Similarly, Neon store patterns use operand 0 as
23961                        the memory location to store to.  */
23962                     gcc_assert (insn_data[d->code].operand[k].predicate
23963                                 == neon_struct_operand);
23964
23965                     switch (d->mode)
23966                       {
23967                       case T_V8QI:
23968                       case T_V16QI:
23969                         eltype = intQI_pointer_node;
23970                         break;
23971
23972                       case T_V4HI:
23973                       case T_V8HI:
23974                         eltype = intHI_pointer_node;
23975                         break;
23976
23977                       case T_V2SI:
23978                       case T_V4SI:
23979                         eltype = intSI_pointer_node;
23980                         break;
23981
23982                       case T_V2SF:
23983                       case T_V4SF:
23984                         eltype = float_pointer_node;
23985                         break;
23986
23987                       case T_DI:
23988                       case T_V2DI:
23989                         eltype = intDI_pointer_node;
23990                         break;
23991
23992                       default: gcc_unreachable ();
23993                       }
23994                   }
23995                 else
23996                   {
23997                     switch (insn_data[d->code].operand[k].mode)
23998                       {
23999                       case VOIDmode: eltype = void_type_node; break;
24000                         /* Scalars.  */
24001                       case QImode: eltype = neon_intQI_type_node; break;
24002                       case HImode: eltype = neon_intHI_type_node; break;
24003                       case SImode: eltype = neon_intSI_type_node; break;
24004                       case SFmode: eltype = neon_float_type_node; break;
24005                       case DImode: eltype = neon_intDI_type_node; break;
24006                       case TImode: eltype = intTI_type_node; break;
24007                       case EImode: eltype = intEI_type_node; break;
24008                       case OImode: eltype = intOI_type_node; break;
24009                       case CImode: eltype = intCI_type_node; break;
24010                       case XImode: eltype = intXI_type_node; break;
24011                         /* 64-bit vectors.  */
24012                       case V8QImode: eltype = V8QI_type_node; break;
24013                       case V4HImode: eltype = V4HI_type_node; break;
24014                       case V2SImode: eltype = V2SI_type_node; break;
24015                       case V2SFmode: eltype = V2SF_type_node; break;
24016                         /* 128-bit vectors.  */
24017                       case V16QImode: eltype = V16QI_type_node; break;
24018                       case V8HImode: eltype = V8HI_type_node; break;
24019                       case V4SImode: eltype = V4SI_type_node; break;
24020                       case V4SFmode: eltype = V4SF_type_node; break;
24021                       case V2DImode: eltype = V2DI_type_node; break;
24022                       default: gcc_unreachable ();
24023                       }
24024                   }
24025
24026                 if (k == 0 && !is_store)
24027                   return_type = eltype;
24028                 else
24029                   args = tree_cons (NULL_TREE, eltype, args);
24030               }
24031
24032             ftype = build_function_type (return_type, args);
24033           }
24034           break;
24035
24036         case NEON_RESULTPAIR:
24037           {
24038             switch (insn_data[d->code].operand[1].mode)
24039               {
24040               case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
24041               case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
24042               case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
24043               case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
24044               case DImode: ftype = void_ftype_pdi_di_di; break;
24045               case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
24046               case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
24047               case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
24048               case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
24049               case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
24050               default: gcc_unreachable ();
24051               }
24052           }
24053           break;
24054
24055         case NEON_REINTERP:
24056           {
24057             /* We iterate over NUM_DREG_TYPES doubleword types,
24058                then NUM_QREG_TYPES quadword  types.
24059                V4HF is not a type used in reinterpret, so we translate
24060                d->mode to the correct index in reinterp_ftype_dreg.  */
24061             bool qreg_p
24062               = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24063             int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24064                       % NUM_QREG_TYPES;
24065             switch (insn_data[d->code].operand[0].mode)
24066               {
24067               case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24068               case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24069               case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24070               case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24071               case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24072               case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24073               case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24074               case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24075               case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24076               case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24077               case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24078               default: gcc_unreachable ();
24079               }
24080           }
24081           break;
24082         case NEON_FLOAT_WIDEN:
24083           {
24084             tree eltype = NULL_TREE;
24085             tree return_type = NULL_TREE;
24086
24087             switch (insn_data[d->code].operand[1].mode)
24088             {
24089               case V4HFmode:
24090                 eltype = V4HF_type_node;
24091                 return_type = V4SF_type_node;
24092                 break;
24093               default: gcc_unreachable ();
24094             }
24095             ftype = build_function_type_list (return_type, eltype, NULL);
24096             break;
24097           }
24098         case NEON_FLOAT_NARROW:
24099           {
24100             tree eltype = NULL_TREE;
24101             tree return_type = NULL_TREE;
24102
24103             switch (insn_data[d->code].operand[1].mode)
24104             {
24105               case V4SFmode:
24106                 eltype = V4SF_type_node;
24107                 return_type = V4HF_type_node;
24108                 break;
24109               default: gcc_unreachable ();
24110             }
24111             ftype = build_function_type_list (return_type, eltype, NULL);
24112             break;
24113           }
24114         default:
24115           gcc_unreachable ();
24116         }
24117
24118       gcc_assert (ftype != NULL);
24119
24120       sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24121
24122       decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24123                                    NULL_TREE);
24124       arm_builtin_decls[fcode] = decl;
24125     }
24126 }
24127
24128 #undef NUM_DREG_TYPES
24129 #undef NUM_QREG_TYPES
24130
24131 #define def_mbuiltin(MASK, NAME, TYPE, CODE)                            \
24132   do                                                                    \
24133     {                                                                   \
24134       if ((MASK) & insn_flags)                                          \
24135         {                                                               \
24136           tree bdecl;                                                   \
24137           bdecl = add_builtin_function ((NAME), (TYPE), (CODE),         \
24138                                         BUILT_IN_MD, NULL, NULL_TREE);  \
24139           arm_builtin_decls[CODE] = bdecl;                              \
24140         }                                                               \
24141     }                                                                   \
24142   while (0)
24143
24144 struct builtin_description
24145 {
24146   const unsigned int       mask;
24147   const enum insn_code     icode;
24148   const char * const       name;
24149   const enum arm_builtins  code;
24150   const enum rtx_code      comparison;
24151   const unsigned int       flag;
24152 };
24153
24154 static const struct builtin_description bdesc_2arg[] =
24155 {
24156 #define IWMMXT_BUILTIN(code, string, builtin) \
24157   { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24158     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24159
24160 #define IWMMXT2_BUILTIN(code, string, builtin) \
24161   { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24162     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24163
24164   IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24165   IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24166   IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24167   IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24168   IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24169   IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24170   IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24171   IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24172   IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24173   IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24174   IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24175   IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24176   IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24177   IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24178   IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24179   IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24180   IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24181   IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24182   IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24183   IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24184   IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24185   IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24186   IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24187   IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24188   IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24189   IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24190   IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24191   IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24192   IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24193   IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24194   IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24195   IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24196   IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24197   IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24198   IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24199   IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24200   IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24201   IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24202   IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24203   IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24204   IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24205   IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24206   IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24207   IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24208   IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24209   IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24210   IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24211   IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24212   IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24213   IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24214   IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24215   IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24216   IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24217   IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24218   IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24219   IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24220   IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24221   IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24222   IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24223   IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24224   IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24225   IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24226   IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24227   IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24228   IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24229   IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24230   IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24231   IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24232   IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24233   IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24234   IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24235   IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24236   IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24237   IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24238   IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24239   IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24240   IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24241   IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24242
24243 #define IWMMXT_BUILTIN2(code, builtin) \
24244   { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24245
24246 #define IWMMXT2_BUILTIN2(code, builtin) \
24247   { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24248
24249   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24250   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24251   IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24252   IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24253   IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24254   IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24255   IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24256   IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24257   IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24258   IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24259
24260 #define CRC32_BUILTIN(L, U) \
24261   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24262    UNKNOWN, 0},
24263    CRC32_BUILTIN (crc32b, CRC32B)
24264    CRC32_BUILTIN (crc32h, CRC32H)
24265    CRC32_BUILTIN (crc32w, CRC32W)
24266    CRC32_BUILTIN (crc32cb, CRC32CB)
24267    CRC32_BUILTIN (crc32ch, CRC32CH)
24268    CRC32_BUILTIN (crc32cw, CRC32CW)
24269 #undef CRC32_BUILTIN
24270
24271
24272 #define CRYPTO_BUILTIN(L, U) \
24273   {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24274    UNKNOWN, 0},
24275 #undef CRYPTO1
24276 #undef CRYPTO2
24277 #undef CRYPTO3
24278 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24279 #define CRYPTO1(L, U, R, A)
24280 #define CRYPTO3(L, U, R, A1, A2, A3)
24281 #include "crypto.def"
24282 #undef CRYPTO1
24283 #undef CRYPTO2
24284 #undef CRYPTO3
24285
24286 };
24287
24288 static const struct builtin_description bdesc_1arg[] =
24289 {
24290   IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24291   IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24292   IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24293   IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24294   IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24295   IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24296   IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24297   IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24298   IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24299   IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24300   IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24301   IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24302   IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24303   IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24304   IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24305   IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24306   IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24307   IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24308   IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24309   IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24310   IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24311   IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24312   IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24313   IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24314
24315 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24316 #define CRYPTO2(L, U, R, A1, A2)
24317 #define CRYPTO3(L, U, R, A1, A2, A3)
24318 #include "crypto.def"
24319 #undef CRYPTO1
24320 #undef CRYPTO2
24321 #undef CRYPTO3
24322 };
24323
24324 static const struct builtin_description bdesc_3arg[] =
24325 {
24326 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24327 #define CRYPTO1(L, U, R, A)
24328 #define CRYPTO2(L, U, R, A1, A2)
24329 #include "crypto.def"
24330 #undef CRYPTO1
24331 #undef CRYPTO2
24332 #undef CRYPTO3
24333  };
24334 #undef CRYPTO_BUILTIN
24335
24336 /* Set up all the iWMMXt builtins.  This is not called if
24337    TARGET_IWMMXT is zero.  */
24338
24339 static void
24340 arm_init_iwmmxt_builtins (void)
24341 {
24342   const struct builtin_description * d;
24343   size_t i;
24344
24345   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24346   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24347   tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24348
24349   tree v8qi_ftype_v8qi_v8qi_int
24350     = build_function_type_list (V8QI_type_node,
24351                                 V8QI_type_node, V8QI_type_node,
24352                                 integer_type_node, NULL_TREE);
24353   tree v4hi_ftype_v4hi_int
24354     = build_function_type_list (V4HI_type_node,
24355                                 V4HI_type_node, integer_type_node, NULL_TREE);
24356   tree v2si_ftype_v2si_int
24357     = build_function_type_list (V2SI_type_node,
24358                                 V2SI_type_node, integer_type_node, NULL_TREE);
24359   tree v2si_ftype_di_di
24360     = build_function_type_list (V2SI_type_node,
24361                                 long_long_integer_type_node,
24362                                 long_long_integer_type_node,
24363                                 NULL_TREE);
24364   tree di_ftype_di_int
24365     = build_function_type_list (long_long_integer_type_node,
24366                                 long_long_integer_type_node,
24367                                 integer_type_node, NULL_TREE);
24368   tree di_ftype_di_int_int
24369     = build_function_type_list (long_long_integer_type_node,
24370                                 long_long_integer_type_node,
24371                                 integer_type_node,
24372                                 integer_type_node, NULL_TREE);
24373   tree int_ftype_v8qi
24374     = build_function_type_list (integer_type_node,
24375                                 V8QI_type_node, NULL_TREE);
24376   tree int_ftype_v4hi
24377     = build_function_type_list (integer_type_node,
24378                                 V4HI_type_node, NULL_TREE);
24379   tree int_ftype_v2si
24380     = build_function_type_list (integer_type_node,
24381                                 V2SI_type_node, NULL_TREE);
24382   tree int_ftype_v8qi_int
24383     = build_function_type_list (integer_type_node,
24384                                 V8QI_type_node, integer_type_node, NULL_TREE);
24385   tree int_ftype_v4hi_int
24386     = build_function_type_list (integer_type_node,
24387                                 V4HI_type_node, integer_type_node, NULL_TREE);
24388   tree int_ftype_v2si_int
24389     = build_function_type_list (integer_type_node,
24390                                 V2SI_type_node, integer_type_node, NULL_TREE);
24391   tree v8qi_ftype_v8qi_int_int
24392     = build_function_type_list (V8QI_type_node,
24393                                 V8QI_type_node, integer_type_node,
24394                                 integer_type_node, NULL_TREE);
24395   tree v4hi_ftype_v4hi_int_int
24396     = build_function_type_list (V4HI_type_node,
24397                                 V4HI_type_node, integer_type_node,
24398                                 integer_type_node, NULL_TREE);
24399   tree v2si_ftype_v2si_int_int
24400     = build_function_type_list (V2SI_type_node,
24401                                 V2SI_type_node, integer_type_node,
24402                                 integer_type_node, NULL_TREE);
24403   /* Miscellaneous.  */
24404   tree v8qi_ftype_v4hi_v4hi
24405     = build_function_type_list (V8QI_type_node,
24406                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24407   tree v4hi_ftype_v2si_v2si
24408     = build_function_type_list (V4HI_type_node,
24409                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24410   tree v8qi_ftype_v4hi_v8qi
24411     = build_function_type_list (V8QI_type_node,
24412                                 V4HI_type_node, V8QI_type_node, NULL_TREE);
24413   tree v2si_ftype_v4hi_v4hi
24414     = build_function_type_list (V2SI_type_node,
24415                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24416   tree v2si_ftype_v8qi_v8qi
24417     = build_function_type_list (V2SI_type_node,
24418                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24419   tree v4hi_ftype_v4hi_di
24420     = build_function_type_list (V4HI_type_node,
24421                                 V4HI_type_node, long_long_integer_type_node,
24422                                 NULL_TREE);
24423   tree v2si_ftype_v2si_di
24424     = build_function_type_list (V2SI_type_node,
24425                                 V2SI_type_node, long_long_integer_type_node,
24426                                 NULL_TREE);
24427   tree di_ftype_void
24428     = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24429   tree int_ftype_void
24430     = build_function_type_list (integer_type_node, NULL_TREE);
24431   tree di_ftype_v8qi
24432     = build_function_type_list (long_long_integer_type_node,
24433                                 V8QI_type_node, NULL_TREE);
24434   tree di_ftype_v4hi
24435     = build_function_type_list (long_long_integer_type_node,
24436                                 V4HI_type_node, NULL_TREE);
24437   tree di_ftype_v2si
24438     = build_function_type_list (long_long_integer_type_node,
24439                                 V2SI_type_node, NULL_TREE);
24440   tree v2si_ftype_v4hi
24441     = build_function_type_list (V2SI_type_node,
24442                                 V4HI_type_node, NULL_TREE);
24443   tree v4hi_ftype_v8qi
24444     = build_function_type_list (V4HI_type_node,
24445                                 V8QI_type_node, NULL_TREE);
24446   tree v8qi_ftype_v8qi
24447     = build_function_type_list (V8QI_type_node,
24448                                 V8QI_type_node, NULL_TREE);
24449   tree v4hi_ftype_v4hi
24450     = build_function_type_list (V4HI_type_node,
24451                                 V4HI_type_node, NULL_TREE);
24452   tree v2si_ftype_v2si
24453     = build_function_type_list (V2SI_type_node,
24454                                 V2SI_type_node, NULL_TREE);
24455
24456   tree di_ftype_di_v4hi_v4hi
24457     = build_function_type_list (long_long_unsigned_type_node,
24458                                 long_long_unsigned_type_node,
24459                                 V4HI_type_node, V4HI_type_node,
24460                                 NULL_TREE);
24461
24462   tree di_ftype_v4hi_v4hi
24463     = build_function_type_list (long_long_unsigned_type_node,
24464                                 V4HI_type_node,V4HI_type_node,
24465                                 NULL_TREE);
24466
24467   tree v2si_ftype_v2si_v4hi_v4hi
24468     = build_function_type_list (V2SI_type_node,
24469                                 V2SI_type_node, V4HI_type_node,
24470                                 V4HI_type_node, NULL_TREE);
24471
24472   tree v2si_ftype_v2si_v8qi_v8qi
24473     = build_function_type_list (V2SI_type_node,
24474                                 V2SI_type_node, V8QI_type_node,
24475                                 V8QI_type_node, NULL_TREE);
24476
24477   tree di_ftype_di_v2si_v2si
24478      = build_function_type_list (long_long_unsigned_type_node,
24479                                  long_long_unsigned_type_node,
24480                                  V2SI_type_node, V2SI_type_node,
24481                                  NULL_TREE);
24482
24483    tree di_ftype_di_di_int
24484      = build_function_type_list (long_long_unsigned_type_node,
24485                                  long_long_unsigned_type_node,
24486                                  long_long_unsigned_type_node,
24487                                  integer_type_node, NULL_TREE);
24488
24489    tree void_ftype_int
24490      = build_function_type_list (void_type_node,
24491                                  integer_type_node, NULL_TREE);
24492
24493    tree v8qi_ftype_char
24494      = build_function_type_list (V8QI_type_node,
24495                                  signed_char_type_node, NULL_TREE);
24496
24497    tree v4hi_ftype_short
24498      = build_function_type_list (V4HI_type_node,
24499                                  short_integer_type_node, NULL_TREE);
24500
24501    tree v2si_ftype_int
24502      = build_function_type_list (V2SI_type_node,
24503                                  integer_type_node, NULL_TREE);
24504
24505   /* Normal vector binops.  */
24506   tree v8qi_ftype_v8qi_v8qi
24507     = build_function_type_list (V8QI_type_node,
24508                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24509   tree v4hi_ftype_v4hi_v4hi
24510     = build_function_type_list (V4HI_type_node,
24511                                 V4HI_type_node,V4HI_type_node, NULL_TREE);
24512   tree v2si_ftype_v2si_v2si
24513     = build_function_type_list (V2SI_type_node,
24514                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24515   tree di_ftype_di_di
24516     = build_function_type_list (long_long_unsigned_type_node,
24517                                 long_long_unsigned_type_node,
24518                                 long_long_unsigned_type_node,
24519                                 NULL_TREE);
24520
24521   /* Add all builtins that are more or less simple operations on two
24522      operands.  */
24523   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24524     {
24525       /* Use one of the operands; the target can have a different mode for
24526          mask-generating compares.  */
24527       enum machine_mode mode;
24528       tree type;
24529
24530       if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24531         continue;
24532
24533       mode = insn_data[d->icode].operand[1].mode;
24534
24535       switch (mode)
24536         {
24537         case V8QImode:
24538           type = v8qi_ftype_v8qi_v8qi;
24539           break;
24540         case V4HImode:
24541           type = v4hi_ftype_v4hi_v4hi;
24542           break;
24543         case V2SImode:
24544           type = v2si_ftype_v2si_v2si;
24545           break;
24546         case DImode:
24547           type = di_ftype_di_di;
24548           break;
24549
24550         default:
24551           gcc_unreachable ();
24552         }
24553
24554       def_mbuiltin (d->mask, d->name, type, d->code);
24555     }
24556
24557   /* Add the remaining MMX insns with somewhat more complicated types.  */
24558 #define iwmmx_mbuiltin(NAME, TYPE, CODE)                        \
24559   def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),       \
24560                 ARM_BUILTIN_ ## CODE)
24561
24562 #define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
24563   def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE),     \
24564                ARM_BUILTIN_ ## CODE)
24565
24566   iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24567   iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24568   iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24569   iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24570   iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24571   iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24572   iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24573   iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24574   iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24575
24576   iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24577   iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24578   iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24579   iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24580   iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24581   iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24582
24583   iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24584   iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24585   iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24586   iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24587   iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24588   iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24589
24590   iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24591   iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24592   iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24593   iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24594   iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24595   iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24596
24597   iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24598   iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24599   iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24600   iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24601   iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24602   iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24603
24604   iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24605
24606   iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24607   iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24608   iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24609   iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24610   iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24611   iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24612   iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24613   iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24614   iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24615   iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24616
24617   iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24618   iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24619   iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24620   iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24621   iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24622   iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24623   iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24624   iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24625   iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24626
24627   iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24628   iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24629   iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24630
24631   iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24632   iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24633   iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24634
24635   iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24636   iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24637
24638   iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24639   iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24640   iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24641   iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24642   iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24643   iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24644
24645   iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24646   iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24647   iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24648   iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24649   iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24650   iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24651   iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24652   iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24653   iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24654   iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24655   iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24656   iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24657
24658   iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24659   iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24660   iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24661   iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24662
24663   iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24664   iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24665   iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24666   iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24667   iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24668   iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24669   iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24670
24671   iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24672   iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24673   iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24674
24675   iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24676   iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24677   iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24678   iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24679
24680   iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24681   iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24682   iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24683   iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24684
24685   iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24686   iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24687   iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24688   iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24689
24690   iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24691   iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24692   iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24693   iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24694
24695   iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24696   iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24697   iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24698   iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24699
24700   iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24701   iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24702   iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24703   iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24704
24705   iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24706
24707   iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24708   iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24709   iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24710
24711 #undef iwmmx_mbuiltin
24712 #undef iwmmx2_mbuiltin
24713 }
24714
24715 static void
24716 arm_init_fp16_builtins (void)
24717 {
24718   tree fp16_type = make_node (REAL_TYPE);
24719   TYPE_PRECISION (fp16_type) = 16;
24720   layout_type (fp16_type);
24721   (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24722 }
24723
24724 static void
24725 arm_init_crc32_builtins ()
24726 {
24727   tree si_ftype_si_qi
24728     = build_function_type_list (unsigned_intSI_type_node,
24729                                 unsigned_intSI_type_node,
24730                                 unsigned_intQI_type_node, NULL_TREE);
24731   tree si_ftype_si_hi
24732     = build_function_type_list (unsigned_intSI_type_node,
24733                                 unsigned_intSI_type_node,
24734                                 unsigned_intHI_type_node, NULL_TREE);
24735   tree si_ftype_si_si
24736     = build_function_type_list (unsigned_intSI_type_node,
24737                                 unsigned_intSI_type_node,
24738                                 unsigned_intSI_type_node, NULL_TREE);
24739
24740   arm_builtin_decls[ARM_BUILTIN_CRC32B]
24741     = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24742                             ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24743   arm_builtin_decls[ARM_BUILTIN_CRC32H]
24744     = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24745                             ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24746   arm_builtin_decls[ARM_BUILTIN_CRC32W]
24747     = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24748                             ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24749   arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24750     = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24751                             ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24752   arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24753     = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24754                             ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24755   arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24756     = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24757                             ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24758 }
24759
24760 static void
24761 arm_init_builtins (void)
24762 {
24763   if (TARGET_REALLY_IWMMXT)
24764     arm_init_iwmmxt_builtins ();
24765
24766   if (TARGET_NEON)
24767     arm_init_neon_builtins ();
24768
24769   if (arm_fp16_format)
24770     arm_init_fp16_builtins ();
24771
24772   if (TARGET_CRC32)
24773     arm_init_crc32_builtins ();
24774 }
24775
24776 /* Return the ARM builtin for CODE.  */
24777
24778 static tree
24779 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24780 {
24781   if (code >= ARM_BUILTIN_MAX)
24782     return error_mark_node;
24783
24784   return arm_builtin_decls[code];
24785 }
24786
24787 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
24788
24789 static const char *
24790 arm_invalid_parameter_type (const_tree t)
24791 {
24792   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24793     return N_("function parameters cannot have __fp16 type");
24794   return NULL;
24795 }
24796
24797 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
24798
24799 static const char *
24800 arm_invalid_return_type (const_tree t)
24801 {
24802   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24803     return N_("functions cannot return __fp16 type");
24804   return NULL;
24805 }
24806
24807 /* Implement TARGET_PROMOTED_TYPE.  */
24808
24809 static tree
24810 arm_promoted_type (const_tree t)
24811 {
24812   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24813     return float_type_node;
24814   return NULL_TREE;
24815 }
24816
24817 /* Implement TARGET_CONVERT_TO_TYPE.
24818    Specifically, this hook implements the peculiarity of the ARM
24819    half-precision floating-point C semantics that requires conversions between
24820    __fp16 to or from double to do an intermediate conversion to float.  */
24821
24822 static tree
24823 arm_convert_to_type (tree type, tree expr)
24824 {
24825   tree fromtype = TREE_TYPE (expr);
24826   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24827     return NULL_TREE;
24828   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24829       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24830     return convert (type, convert (float_type_node, expr));
24831   return NULL_TREE;
24832 }
24833
24834 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24835    This simply adds HFmode as a supported mode; even though we don't
24836    implement arithmetic on this type directly, it's supported by
24837    optabs conversions, much the way the double-word arithmetic is
24838    special-cased in the default hook.  */
24839
24840 static bool
24841 arm_scalar_mode_supported_p (enum machine_mode mode)
24842 {
24843   if (mode == HFmode)
24844     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24845   else if (ALL_FIXED_POINT_MODE_P (mode))
24846     return true;
24847   else
24848     return default_scalar_mode_supported_p (mode);
24849 }
24850
24851 /* Errors in the source file can cause expand_expr to return const0_rtx
24852    where we expect a vector.  To avoid crashing, use one of the vector
24853    clear instructions.  */
24854
24855 static rtx
24856 safe_vector_operand (rtx x, enum machine_mode mode)
24857 {
24858   if (x != const0_rtx)
24859     return x;
24860   x = gen_reg_rtx (mode);
24861
24862   emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24863                                : gen_rtx_SUBREG (DImode, x, 0)));
24864   return x;
24865 }
24866
24867 /* Function to expand ternary builtins.  */
24868 static rtx
24869 arm_expand_ternop_builtin (enum insn_code icode,
24870                            tree exp, rtx target)
24871 {
24872   rtx pat;
24873   tree arg0 = CALL_EXPR_ARG (exp, 0);
24874   tree arg1 = CALL_EXPR_ARG (exp, 1);
24875   tree arg2 = CALL_EXPR_ARG (exp, 2);
24876
24877   rtx op0 = expand_normal (arg0);
24878   rtx op1 = expand_normal (arg1);
24879   rtx op2 = expand_normal (arg2);
24880   rtx op3 = NULL_RTX;
24881
24882   /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24883      lane operand depending on endianness.  */
24884   bool builtin_sha1cpm_p = false;
24885
24886   if (insn_data[icode].n_operands == 5)
24887     {
24888       gcc_assert (icode == CODE_FOR_crypto_sha1c
24889                   || icode == CODE_FOR_crypto_sha1p
24890                   || icode == CODE_FOR_crypto_sha1m);
24891       builtin_sha1cpm_p = true;
24892     }
24893   enum machine_mode tmode = insn_data[icode].operand[0].mode;
24894   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24895   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24896   enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24897
24898
24899   if (VECTOR_MODE_P (mode0))
24900     op0 = safe_vector_operand (op0, mode0);
24901   if (VECTOR_MODE_P (mode1))
24902     op1 = safe_vector_operand (op1, mode1);
24903   if (VECTOR_MODE_P (mode2))
24904     op2 = safe_vector_operand (op2, mode2);
24905
24906   if (! target
24907       || GET_MODE (target) != tmode
24908       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24909     target = gen_reg_rtx (tmode);
24910
24911   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24912               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24913               && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24914
24915   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24916     op0 = copy_to_mode_reg (mode0, op0);
24917   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24918     op1 = copy_to_mode_reg (mode1, op1);
24919   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24920     op2 = copy_to_mode_reg (mode2, op2);
24921   if (builtin_sha1cpm_p)
24922     op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24923
24924   if (builtin_sha1cpm_p)
24925     pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24926   else
24927     pat = GEN_FCN (icode) (target, op0, op1, op2);
24928   if (! pat)
24929     return 0;
24930   emit_insn (pat);
24931   return target;
24932 }
24933
24934 /* Subroutine of arm_expand_builtin to take care of binop insns.  */
24935
24936 static rtx
24937 arm_expand_binop_builtin (enum insn_code icode,
24938                           tree exp, rtx target)
24939 {
24940   rtx pat;
24941   tree arg0 = CALL_EXPR_ARG (exp, 0);
24942   tree arg1 = CALL_EXPR_ARG (exp, 1);
24943   rtx op0 = expand_normal (arg0);
24944   rtx op1 = expand_normal (arg1);
24945   enum machine_mode tmode = insn_data[icode].operand[0].mode;
24946   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24947   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24948
24949   if (VECTOR_MODE_P (mode0))
24950     op0 = safe_vector_operand (op0, mode0);
24951   if (VECTOR_MODE_P (mode1))
24952     op1 = safe_vector_operand (op1, mode1);
24953
24954   if (! target
24955       || GET_MODE (target) != tmode
24956       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24957     target = gen_reg_rtx (tmode);
24958
24959   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24960               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24961
24962   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24963     op0 = copy_to_mode_reg (mode0, op0);
24964   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24965     op1 = copy_to_mode_reg (mode1, op1);
24966
24967   pat = GEN_FCN (icode) (target, op0, op1);
24968   if (! pat)
24969     return 0;
24970   emit_insn (pat);
24971   return target;
24972 }
24973
24974 /* Subroutine of arm_expand_builtin to take care of unop insns.  */
24975
24976 static rtx
24977 arm_expand_unop_builtin (enum insn_code icode,
24978                          tree exp, rtx target, int do_load)
24979 {
24980   rtx pat;
24981   tree arg0 = CALL_EXPR_ARG (exp, 0);
24982   rtx op0 = expand_normal (arg0);
24983   rtx op1 = NULL_RTX;
24984   enum machine_mode tmode = insn_data[icode].operand[0].mode;
24985   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24986   bool builtin_sha1h_p = false;
24987
24988   if (insn_data[icode].n_operands == 3)
24989     {
24990       gcc_assert (icode == CODE_FOR_crypto_sha1h);
24991       builtin_sha1h_p = true;
24992     }
24993
24994   if (! target
24995       || GET_MODE (target) != tmode
24996       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24997     target = gen_reg_rtx (tmode);
24998   if (do_load)
24999     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25000   else
25001     {
25002       if (VECTOR_MODE_P (mode0))
25003         op0 = safe_vector_operand (op0, mode0);
25004
25005       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25006         op0 = copy_to_mode_reg (mode0, op0);
25007     }
25008   if (builtin_sha1h_p)
25009     op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25010
25011   if (builtin_sha1h_p)
25012     pat = GEN_FCN (icode) (target, op0, op1);
25013   else
25014     pat = GEN_FCN (icode) (target, op0);
25015   if (! pat)
25016     return 0;
25017   emit_insn (pat);
25018   return target;
25019 }
25020
25021 typedef enum {
25022   NEON_ARG_COPY_TO_REG,
25023   NEON_ARG_CONSTANT,
25024   NEON_ARG_MEMORY,
25025   NEON_ARG_STOP
25026 } builtin_arg;
25027
25028 #define NEON_MAX_BUILTIN_ARGS 5
25029
25030 /* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
25031    and return an expression for the accessed memory.
25032
25033    The intrinsic function operates on a block of registers that has
25034    mode REG_MODE.  This block contains vectors of type TYPE_MODE.  The
25035    function references the memory at EXP of type TYPE and in mode
25036    MEM_MODE; this mode may be BLKmode if no more suitable mode is
25037    available.  */
25038
25039 static tree
25040 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25041                           enum machine_mode reg_mode,
25042                           neon_builtin_type_mode type_mode)
25043 {
25044   HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25045   tree elem_type, upper_bound, array_type;
25046
25047   /* Work out the size of the register block in bytes.  */
25048   reg_size = GET_MODE_SIZE (reg_mode);
25049
25050   /* Work out the size of each vector in bytes.  */
25051   gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25052   vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25053
25054   /* Work out how many vectors there are.  */
25055   gcc_assert (reg_size % vector_size == 0);
25056   nvectors = reg_size / vector_size;
25057
25058   /* Work out the type of each element.  */
25059   gcc_assert (POINTER_TYPE_P (type));
25060   elem_type = TREE_TYPE (type);
25061
25062   /* Work out how many elements are being loaded or stored.
25063      MEM_MODE == REG_MODE implies a one-to-one mapping between register
25064      and memory elements; anything else implies a lane load or store.  */
25065   if (mem_mode == reg_mode)
25066     nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25067   else
25068     nelems = nvectors;
25069
25070   /* Create a type that describes the full access.  */
25071   upper_bound = build_int_cst (size_type_node, nelems - 1);
25072   array_type = build_array_type (elem_type, build_index_type (upper_bound));
25073
25074   /* Dereference EXP using that type.  */
25075   return fold_build2 (MEM_REF, array_type, exp,
25076                       build_int_cst (build_pointer_type (array_type), 0));
25077 }
25078
25079 /* Expand a Neon builtin.  */
25080 static rtx
25081 arm_expand_neon_args (rtx target, int icode, int have_retval,
25082                       neon_builtin_type_mode type_mode,
25083                       tree exp, int fcode, ...)
25084 {
25085   va_list ap;
25086   rtx pat;
25087   tree arg[NEON_MAX_BUILTIN_ARGS];
25088   rtx op[NEON_MAX_BUILTIN_ARGS];
25089   tree arg_type;
25090   tree formals;
25091   enum machine_mode tmode = insn_data[icode].operand[0].mode;
25092   enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25093   enum machine_mode other_mode;
25094   int argc = 0;
25095   int opno;
25096
25097   if (have_retval
25098       && (!target
25099           || GET_MODE (target) != tmode
25100           || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25101     target = gen_reg_rtx (tmode);
25102
25103   va_start (ap, fcode);
25104
25105   formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25106
25107   for (;;)
25108     {
25109       builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25110
25111       if (thisarg == NEON_ARG_STOP)
25112         break;
25113       else
25114         {
25115           opno = argc + have_retval;
25116           mode[argc] = insn_data[icode].operand[opno].mode;
25117           arg[argc] = CALL_EXPR_ARG (exp, argc);
25118           arg_type = TREE_VALUE (formals);
25119           if (thisarg == NEON_ARG_MEMORY)
25120             {
25121               other_mode = insn_data[icode].operand[1 - opno].mode;
25122               arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25123                                                     mode[argc], other_mode,
25124                                                     type_mode);
25125             }
25126
25127           /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25128              be returned.  */
25129           op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25130                                   (thisarg == NEON_ARG_MEMORY
25131                                    ? EXPAND_MEMORY : EXPAND_NORMAL));
25132
25133           switch (thisarg)
25134             {
25135             case NEON_ARG_COPY_TO_REG:
25136               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25137               if (!(*insn_data[icode].operand[opno].predicate)
25138                      (op[argc], mode[argc]))
25139                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25140               break;
25141
25142             case NEON_ARG_CONSTANT:
25143               /* FIXME: This error message is somewhat unhelpful.  */
25144               if (!(*insn_data[icode].operand[opno].predicate)
25145                     (op[argc], mode[argc]))
25146                 error ("argument must be a constant");
25147               break;
25148
25149             case NEON_ARG_MEMORY:
25150               /* Check if expand failed.  */
25151               if (op[argc] == const0_rtx)
25152                 return 0;
25153               gcc_assert (MEM_P (op[argc]));
25154               PUT_MODE (op[argc], mode[argc]);
25155               /* ??? arm_neon.h uses the same built-in functions for signed
25156                  and unsigned accesses, casting where necessary.  This isn't
25157                  alias safe.  */
25158               set_mem_alias_set (op[argc], 0);
25159               if (!(*insn_data[icode].operand[opno].predicate)
25160                     (op[argc], mode[argc]))
25161                 op[argc] = (replace_equiv_address
25162                             (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25163               break;
25164
25165             case NEON_ARG_STOP:
25166               gcc_unreachable ();
25167             }
25168
25169           argc++;
25170           formals = TREE_CHAIN (formals);
25171         }
25172     }
25173
25174   va_end (ap);
25175
25176   if (have_retval)
25177     switch (argc)
25178       {
25179       case 1:
25180         pat = GEN_FCN (icode) (target, op[0]);
25181         break;
25182
25183       case 2:
25184         pat = GEN_FCN (icode) (target, op[0], op[1]);
25185         break;
25186
25187       case 3:
25188         pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25189         break;
25190
25191       case 4:
25192         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25193         break;
25194
25195       case 5:
25196         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25197         break;
25198
25199       default:
25200         gcc_unreachable ();
25201       }
25202   else
25203     switch (argc)
25204       {
25205       case 1:
25206         pat = GEN_FCN (icode) (op[0]);
25207         break;
25208
25209       case 2:
25210         pat = GEN_FCN (icode) (op[0], op[1]);
25211         break;
25212
25213       case 3:
25214         pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25215         break;
25216
25217       case 4:
25218         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25219         break;
25220
25221       case 5:
25222         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25223         break;
25224
25225       default:
25226         gcc_unreachable ();
25227       }
25228
25229   if (!pat)
25230     return 0;
25231
25232   emit_insn (pat);
25233
25234   return target;
25235 }
25236
25237 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25238    constants defined per-instruction or per instruction-variant. Instead, the
25239    required info is looked up in the table neon_builtin_data.  */
25240 static rtx
25241 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25242 {
25243   neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25244   neon_itype itype = d->itype;
25245   enum insn_code icode = d->code;
25246   neon_builtin_type_mode type_mode = d->mode;
25247
25248   switch (itype)
25249     {
25250     case NEON_UNOP:
25251     case NEON_CONVERT:
25252     case NEON_DUPLANE:
25253       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25254         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25255
25256     case NEON_BINOP:
25257     case NEON_SETLANE:
25258     case NEON_SCALARMUL:
25259     case NEON_SCALARMULL:
25260     case NEON_SCALARMULH:
25261     case NEON_SHIFTINSERT:
25262     case NEON_LOGICBINOP:
25263       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25264         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25265         NEON_ARG_STOP);
25266
25267     case NEON_TERNOP:
25268       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25269         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25270         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25271
25272     case NEON_GETLANE:
25273     case NEON_FIXCONV:
25274     case NEON_SHIFTIMM:
25275       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25276         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25277         NEON_ARG_STOP);
25278
25279     case NEON_CREATE:
25280       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25281         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25282
25283     case NEON_DUP:
25284     case NEON_RINT:
25285     case NEON_SPLIT:
25286     case NEON_FLOAT_WIDEN:
25287     case NEON_FLOAT_NARROW:
25288     case NEON_REINTERP:
25289       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25290         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25291
25292     case NEON_COMBINE:
25293     case NEON_VTBL:
25294       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25295         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25296
25297     case NEON_RESULTPAIR:
25298       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25299         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25300         NEON_ARG_STOP);
25301
25302     case NEON_LANEMUL:
25303     case NEON_LANEMULL:
25304     case NEON_LANEMULH:
25305       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25306         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25307         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25308
25309     case NEON_LANEMAC:
25310       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25311         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25312         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25313
25314     case NEON_SHIFTACC:
25315       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25316         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25317         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25318
25319     case NEON_SCALARMAC:
25320       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25321         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25322         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25323
25324     case NEON_SELECT:
25325     case NEON_VTBX:
25326       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25327         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25328         NEON_ARG_STOP);
25329
25330     case NEON_LOAD1:
25331     case NEON_LOADSTRUCT:
25332       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25333         NEON_ARG_MEMORY, NEON_ARG_STOP);
25334
25335     case NEON_LOAD1LANE:
25336     case NEON_LOADSTRUCTLANE:
25337       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25338         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25339         NEON_ARG_STOP);
25340
25341     case NEON_STORE1:
25342     case NEON_STORESTRUCT:
25343       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25344         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25345
25346     case NEON_STORE1LANE:
25347     case NEON_STORESTRUCTLANE:
25348       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25349         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25350         NEON_ARG_STOP);
25351     }
25352
25353   gcc_unreachable ();
25354 }
25355
25356 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
25357 void
25358 neon_reinterpret (rtx dest, rtx src)
25359 {
25360   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25361 }
25362
25363 /* Emit code to place a Neon pair result in memory locations (with equal
25364    registers).  */
25365 void
25366 neon_emit_pair_result_insn (enum machine_mode mode,
25367                             rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25368                             rtx op1, rtx op2)
25369 {
25370   rtx mem = gen_rtx_MEM (mode, destaddr);
25371   rtx tmp1 = gen_reg_rtx (mode);
25372   rtx tmp2 = gen_reg_rtx (mode);
25373
25374   emit_insn (intfn (tmp1, op1, op2, tmp2));
25375
25376   emit_move_insn (mem, tmp1);
25377   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25378   emit_move_insn (mem, tmp2);
25379 }
25380
25381 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25382    not to early-clobber SRC registers in the process.
25383
25384    We assume that the operands described by SRC and DEST represent a
25385    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25386    number of components into which the copy has been decomposed.  */
25387 void
25388 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25389 {
25390   unsigned int i;
25391
25392   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25393       || REGNO (operands[0]) < REGNO (operands[1]))
25394     {
25395       for (i = 0; i < count; i++)
25396         {
25397           operands[2 * i] = dest[i];
25398           operands[2 * i + 1] = src[i];
25399         }
25400     }
25401   else
25402     {
25403       for (i = 0; i < count; i++)
25404         {
25405           operands[2 * i] = dest[count - i - 1];
25406           operands[2 * i + 1] = src[count - i - 1];
25407         }
25408     }
25409 }
25410
25411 /* Split operands into moves from op[1] + op[2] into op[0].  */
25412
25413 void
25414 neon_split_vcombine (rtx operands[3])
25415 {
25416   unsigned int dest = REGNO (operands[0]);
25417   unsigned int src1 = REGNO (operands[1]);
25418   unsigned int src2 = REGNO (operands[2]);
25419   enum machine_mode halfmode = GET_MODE (operands[1]);
25420   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25421   rtx destlo, desthi;
25422
25423   if (src1 == dest && src2 == dest + halfregs)
25424     {
25425       /* No-op move.  Can't split to nothing; emit something.  */
25426       emit_note (NOTE_INSN_DELETED);
25427       return;
25428     }
25429
25430   /* Preserve register attributes for variable tracking.  */
25431   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25432   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25433                                GET_MODE_SIZE (halfmode));
25434
25435   /* Special case of reversed high/low parts.  Use VSWP.  */
25436   if (src2 == dest && src1 == dest + halfregs)
25437     {
25438       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25439       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25440       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25441       return;
25442     }
25443
25444   if (!reg_overlap_mentioned_p (operands[2], destlo))
25445     {
25446       /* Try to avoid unnecessary moves if part of the result
25447          is in the right place already.  */
25448       if (src1 != dest)
25449         emit_move_insn (destlo, operands[1]);
25450       if (src2 != dest + halfregs)
25451         emit_move_insn (desthi, operands[2]);
25452     }
25453   else
25454     {
25455       if (src2 != dest + halfregs)
25456         emit_move_insn (desthi, operands[2]);
25457       if (src1 != dest)
25458         emit_move_insn (destlo, operands[1]);
25459     }
25460 }
25461
25462 /* Expand an expression EXP that calls a built-in function,
25463    with result going to TARGET if that's convenient
25464    (and in mode MODE if that's convenient).
25465    SUBTARGET may be used as the target for computing one of EXP's operands.
25466    IGNORE is nonzero if the value is to be ignored.  */
25467
25468 static rtx
25469 arm_expand_builtin (tree exp,
25470                     rtx target,
25471                     rtx subtarget ATTRIBUTE_UNUSED,
25472                     enum machine_mode mode ATTRIBUTE_UNUSED,
25473                     int ignore ATTRIBUTE_UNUSED)
25474 {
25475   const struct builtin_description * d;
25476   enum insn_code    icode;
25477   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25478   tree              arg0;
25479   tree              arg1;
25480   tree              arg2;
25481   rtx               op0;
25482   rtx               op1;
25483   rtx               op2;
25484   rtx               pat;
25485   unsigned int      fcode = DECL_FUNCTION_CODE (fndecl);
25486   size_t            i;
25487   enum machine_mode tmode;
25488   enum machine_mode mode0;
25489   enum machine_mode mode1;
25490   enum machine_mode mode2;
25491   int opint;
25492   int selector;
25493   int mask;
25494   int imm;
25495
25496   if (fcode >= ARM_BUILTIN_NEON_BASE)
25497     return arm_expand_neon_builtin (fcode, exp, target);
25498
25499   switch (fcode)
25500     {
25501     case ARM_BUILTIN_TEXTRMSB:
25502     case ARM_BUILTIN_TEXTRMUB:
25503     case ARM_BUILTIN_TEXTRMSH:
25504     case ARM_BUILTIN_TEXTRMUH:
25505     case ARM_BUILTIN_TEXTRMSW:
25506     case ARM_BUILTIN_TEXTRMUW:
25507       icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25508                : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25509                : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25510                : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25511                : CODE_FOR_iwmmxt_textrmw);
25512
25513       arg0 = CALL_EXPR_ARG (exp, 0);
25514       arg1 = CALL_EXPR_ARG (exp, 1);
25515       op0 = expand_normal (arg0);
25516       op1 = expand_normal (arg1);
25517       tmode = insn_data[icode].operand[0].mode;
25518       mode0 = insn_data[icode].operand[1].mode;
25519       mode1 = insn_data[icode].operand[2].mode;
25520
25521       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25522         op0 = copy_to_mode_reg (mode0, op0);
25523       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25524         {
25525           /* @@@ better error message */
25526           error ("selector must be an immediate");
25527           return gen_reg_rtx (tmode);
25528         }
25529
25530       opint = INTVAL (op1);
25531       if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25532         {
25533           if (opint > 7 || opint < 0)
25534             error ("the range of selector should be in 0 to 7");
25535         }
25536       else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25537         {
25538           if (opint > 3 || opint < 0)
25539             error ("the range of selector should be in 0 to 3");
25540         }
25541       else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
25542         {
25543           if (opint > 1 || opint < 0)
25544             error ("the range of selector should be in 0 to 1");
25545         }
25546
25547       if (target == 0
25548           || GET_MODE (target) != tmode
25549           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25550         target = gen_reg_rtx (tmode);
25551       pat = GEN_FCN (icode) (target, op0, op1);
25552       if (! pat)
25553         return 0;
25554       emit_insn (pat);
25555       return target;
25556
25557     case ARM_BUILTIN_WALIGNI:
25558       /* If op2 is immediate, call walighi, else call walighr.  */
25559       arg0 = CALL_EXPR_ARG (exp, 0);
25560       arg1 = CALL_EXPR_ARG (exp, 1);
25561       arg2 = CALL_EXPR_ARG (exp, 2);
25562       op0 = expand_normal (arg0);
25563       op1 = expand_normal (arg1);
25564       op2 = expand_normal (arg2);
25565       if (CONST_INT_P (op2))
25566         {
25567           icode = CODE_FOR_iwmmxt_waligni;
25568           tmode = insn_data[icode].operand[0].mode;
25569           mode0 = insn_data[icode].operand[1].mode;
25570           mode1 = insn_data[icode].operand[2].mode;
25571           mode2 = insn_data[icode].operand[3].mode;
25572           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25573             op0 = copy_to_mode_reg (mode0, op0);
25574           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25575             op1 = copy_to_mode_reg (mode1, op1);
25576           gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25577           selector = INTVAL (op2);
25578           if (selector > 7 || selector < 0)
25579             error ("the range of selector should be in 0 to 7");
25580         }
25581       else
25582         {
25583           icode = CODE_FOR_iwmmxt_walignr;
25584           tmode = insn_data[icode].operand[0].mode;
25585           mode0 = insn_data[icode].operand[1].mode;
25586           mode1 = insn_data[icode].operand[2].mode;
25587           mode2 = insn_data[icode].operand[3].mode;
25588           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25589             op0 = copy_to_mode_reg (mode0, op0);
25590           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25591             op1 = copy_to_mode_reg (mode1, op1);
25592           if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25593             op2 = copy_to_mode_reg (mode2, op2);
25594         }
25595       if (target == 0
25596           || GET_MODE (target) != tmode
25597           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25598         target = gen_reg_rtx (tmode);
25599       pat = GEN_FCN (icode) (target, op0, op1, op2);
25600       if (!pat)
25601         return 0;
25602       emit_insn (pat);
25603       return target;
25604
25605     case ARM_BUILTIN_TINSRB:
25606     case ARM_BUILTIN_TINSRH:
25607     case ARM_BUILTIN_TINSRW:
25608     case ARM_BUILTIN_WMERGE:
25609       icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25610                : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25611                : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25612                : CODE_FOR_iwmmxt_tinsrw);
25613       arg0 = CALL_EXPR_ARG (exp, 0);
25614       arg1 = CALL_EXPR_ARG (exp, 1);
25615       arg2 = CALL_EXPR_ARG (exp, 2);
25616       op0 = expand_normal (arg0);
25617       op1 = expand_normal (arg1);
25618       op2 = expand_normal (arg2);
25619       tmode = insn_data[icode].operand[0].mode;
25620       mode0 = insn_data[icode].operand[1].mode;
25621       mode1 = insn_data[icode].operand[2].mode;
25622       mode2 = insn_data[icode].operand[3].mode;
25623
25624       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25625         op0 = copy_to_mode_reg (mode0, op0);
25626       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25627         op1 = copy_to_mode_reg (mode1, op1);
25628       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25629         {
25630           error ("selector must be an immediate");
25631           return const0_rtx;
25632         }
25633       if (icode == CODE_FOR_iwmmxt_wmerge)
25634         {
25635           selector = INTVAL (op2);
25636           if (selector > 7 || selector < 0)
25637             error ("the range of selector should be in 0 to 7");
25638         }
25639       if ((icode == CODE_FOR_iwmmxt_tinsrb)
25640           || (icode == CODE_FOR_iwmmxt_tinsrh)
25641           || (icode == CODE_FOR_iwmmxt_tinsrw))
25642         {
25643           mask = 0x01;
25644           selector= INTVAL (op2);
25645           if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25646             error ("the range of selector should be in 0 to 7");
25647           else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25648             error ("the range of selector should be in 0 to 3");
25649           else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25650             error ("the range of selector should be in 0 to 1");
25651           mask <<= selector;
25652           op2 = GEN_INT (mask);
25653         }
25654       if (target == 0
25655           || GET_MODE (target) != tmode
25656           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25657         target = gen_reg_rtx (tmode);
25658       pat = GEN_FCN (icode) (target, op0, op1, op2);
25659       if (! pat)
25660         return 0;
25661       emit_insn (pat);
25662       return target;
25663
25664     case ARM_BUILTIN_SETWCGR0:
25665     case ARM_BUILTIN_SETWCGR1:
25666     case ARM_BUILTIN_SETWCGR2:
25667     case ARM_BUILTIN_SETWCGR3:
25668       icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25669                : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25670                : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25671                : CODE_FOR_iwmmxt_setwcgr3);
25672       arg0 = CALL_EXPR_ARG (exp, 0);
25673       op0 = expand_normal (arg0);
25674       mode0 = insn_data[icode].operand[0].mode;
25675       if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25676         op0 = copy_to_mode_reg (mode0, op0);
25677       pat = GEN_FCN (icode) (op0);
25678       if (!pat)
25679         return 0;
25680       emit_insn (pat);
25681       return 0;
25682
25683     case ARM_BUILTIN_GETWCGR0:
25684     case ARM_BUILTIN_GETWCGR1:
25685     case ARM_BUILTIN_GETWCGR2:
25686     case ARM_BUILTIN_GETWCGR3:
25687       icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25688                : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25689                : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25690                : CODE_FOR_iwmmxt_getwcgr3);
25691       tmode = insn_data[icode].operand[0].mode;
25692       if (target == 0
25693           || GET_MODE (target) != tmode
25694           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25695         target = gen_reg_rtx (tmode);
25696       pat = GEN_FCN (icode) (target);
25697       if (!pat)
25698         return 0;
25699       emit_insn (pat);
25700       return target;
25701
25702     case ARM_BUILTIN_WSHUFH:
25703       icode = CODE_FOR_iwmmxt_wshufh;
25704       arg0 = CALL_EXPR_ARG (exp, 0);
25705       arg1 = CALL_EXPR_ARG (exp, 1);
25706       op0 = expand_normal (arg0);
25707       op1 = expand_normal (arg1);
25708       tmode = insn_data[icode].operand[0].mode;
25709       mode1 = insn_data[icode].operand[1].mode;
25710       mode2 = insn_data[icode].operand[2].mode;
25711
25712       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25713         op0 = copy_to_mode_reg (mode1, op0);
25714       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25715         {
25716           error ("mask must be an immediate");
25717           return const0_rtx;
25718         }
25719       selector = INTVAL (op1);
25720       if (selector < 0 || selector > 255)
25721         error ("the range of mask should be in 0 to 255");
25722       if (target == 0
25723           || GET_MODE (target) != tmode
25724           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25725         target = gen_reg_rtx (tmode);
25726       pat = GEN_FCN (icode) (target, op0, op1);
25727       if (! pat)
25728         return 0;
25729       emit_insn (pat);
25730       return target;
25731
25732     case ARM_BUILTIN_WMADDS:
25733       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25734     case ARM_BUILTIN_WMADDSX:
25735       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25736     case ARM_BUILTIN_WMADDSN:
25737       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25738     case ARM_BUILTIN_WMADDU:
25739       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25740     case ARM_BUILTIN_WMADDUX:
25741       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25742     case ARM_BUILTIN_WMADDUN:
25743       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25744     case ARM_BUILTIN_WSADBZ:
25745       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25746     case ARM_BUILTIN_WSADHZ:
25747       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25748
25749       /* Several three-argument builtins.  */
25750     case ARM_BUILTIN_WMACS:
25751     case ARM_BUILTIN_WMACU:
25752     case ARM_BUILTIN_TMIA:
25753     case ARM_BUILTIN_TMIAPH:
25754     case ARM_BUILTIN_TMIATT:
25755     case ARM_BUILTIN_TMIATB:
25756     case ARM_BUILTIN_TMIABT:
25757     case ARM_BUILTIN_TMIABB:
25758     case ARM_BUILTIN_WQMIABB:
25759     case ARM_BUILTIN_WQMIABT:
25760     case ARM_BUILTIN_WQMIATB:
25761     case ARM_BUILTIN_WQMIATT:
25762     case ARM_BUILTIN_WQMIABBN:
25763     case ARM_BUILTIN_WQMIABTN:
25764     case ARM_BUILTIN_WQMIATBN:
25765     case ARM_BUILTIN_WQMIATTN:
25766     case ARM_BUILTIN_WMIABB:
25767     case ARM_BUILTIN_WMIABT:
25768     case ARM_BUILTIN_WMIATB:
25769     case ARM_BUILTIN_WMIATT:
25770     case ARM_BUILTIN_WMIABBN:
25771     case ARM_BUILTIN_WMIABTN:
25772     case ARM_BUILTIN_WMIATBN:
25773     case ARM_BUILTIN_WMIATTN:
25774     case ARM_BUILTIN_WMIAWBB:
25775     case ARM_BUILTIN_WMIAWBT:
25776     case ARM_BUILTIN_WMIAWTB:
25777     case ARM_BUILTIN_WMIAWTT:
25778     case ARM_BUILTIN_WMIAWBBN:
25779     case ARM_BUILTIN_WMIAWBTN:
25780     case ARM_BUILTIN_WMIAWTBN:
25781     case ARM_BUILTIN_WMIAWTTN:
25782     case ARM_BUILTIN_WSADB:
25783     case ARM_BUILTIN_WSADH:
25784       icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25785                : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25786                : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25787                : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25788                : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25789                : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25790                : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25791                : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25792                : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25793                : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25794                : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25795                : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25796                : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25797                : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25798                : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25799                : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25800                : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25801                : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25802                : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25803                : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25804                : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25805                : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25806                : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25807                : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25808                : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25809                : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25810                : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25811                : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25812                : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25813                : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25814                : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25815                : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25816                : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25817                : CODE_FOR_iwmmxt_wsadh);
25818       arg0 = CALL_EXPR_ARG (exp, 0);
25819       arg1 = CALL_EXPR_ARG (exp, 1);
25820       arg2 = CALL_EXPR_ARG (exp, 2);
25821       op0 = expand_normal (arg0);
25822       op1 = expand_normal (arg1);
25823       op2 = expand_normal (arg2);
25824       tmode = insn_data[icode].operand[0].mode;
25825       mode0 = insn_data[icode].operand[1].mode;
25826       mode1 = insn_data[icode].operand[2].mode;
25827       mode2 = insn_data[icode].operand[3].mode;
25828
25829       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25830         op0 = copy_to_mode_reg (mode0, op0);
25831       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25832         op1 = copy_to_mode_reg (mode1, op1);
25833       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25834         op2 = copy_to_mode_reg (mode2, op2);
25835       if (target == 0
25836           || GET_MODE (target) != tmode
25837           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25838         target = gen_reg_rtx (tmode);
25839       pat = GEN_FCN (icode) (target, op0, op1, op2);
25840       if (! pat)
25841         return 0;
25842       emit_insn (pat);
25843       return target;
25844
25845     case ARM_BUILTIN_WZERO:
25846       target = gen_reg_rtx (DImode);
25847       emit_insn (gen_iwmmxt_clrdi (target));
25848       return target;
25849
25850     case ARM_BUILTIN_WSRLHI:
25851     case ARM_BUILTIN_WSRLWI:
25852     case ARM_BUILTIN_WSRLDI:
25853     case ARM_BUILTIN_WSLLHI:
25854     case ARM_BUILTIN_WSLLWI:
25855     case ARM_BUILTIN_WSLLDI:
25856     case ARM_BUILTIN_WSRAHI:
25857     case ARM_BUILTIN_WSRAWI:
25858     case ARM_BUILTIN_WSRADI:
25859     case ARM_BUILTIN_WRORHI:
25860     case ARM_BUILTIN_WRORWI:
25861     case ARM_BUILTIN_WRORDI:
25862     case ARM_BUILTIN_WSRLH:
25863     case ARM_BUILTIN_WSRLW:
25864     case ARM_BUILTIN_WSRLD:
25865     case ARM_BUILTIN_WSLLH:
25866     case ARM_BUILTIN_WSLLW:
25867     case ARM_BUILTIN_WSLLD:
25868     case ARM_BUILTIN_WSRAH:
25869     case ARM_BUILTIN_WSRAW:
25870     case ARM_BUILTIN_WSRAD:
25871     case ARM_BUILTIN_WRORH:
25872     case ARM_BUILTIN_WRORW:
25873     case ARM_BUILTIN_WRORD:
25874       icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25875                : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25876                : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25877                : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25878                : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25879                : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25880                : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25881                : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25882                : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25883                : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25884                : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25885                : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25886                : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
25887                : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
25888                : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
25889                : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
25890                : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
25891                : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
25892                : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
25893                : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
25894                : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
25895                : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
25896                : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
25897                : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
25898                : CODE_FOR_nothing);
25899       arg1 = CALL_EXPR_ARG (exp, 1);
25900       op1 = expand_normal (arg1);
25901       if (GET_MODE (op1) == VOIDmode)
25902         {
25903           imm = INTVAL (op1);
25904           if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25905                || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25906               && (imm < 0 || imm > 32))
25907             {
25908               if (fcode == ARM_BUILTIN_WRORHI)
25909                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi16 in code.");
25910               else if (fcode == ARM_BUILTIN_WRORWI)
25911                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi32 in code.");
25912               else if (fcode == ARM_BUILTIN_WRORH)
25913                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi16 in code.");
25914               else
25915                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi32 in code.");
25916             }
25917           else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25918                    && (imm < 0 || imm > 64))
25919             {
25920               if (fcode == ARM_BUILTIN_WRORDI)
25921                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_rori_si64 in code.");
25922               else
25923                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_ror_si64 in code.");
25924             }
25925           else if (imm < 0)
25926             {
25927               if (fcode == ARM_BUILTIN_WSRLHI)
25928                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi16 in code.");
25929               else if (fcode == ARM_BUILTIN_WSRLWI)
25930                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi32 in code.");
25931               else if (fcode == ARM_BUILTIN_WSRLDI)
25932                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_si64 in code.");
25933               else if (fcode == ARM_BUILTIN_WSLLHI)
25934                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi16 in code.");
25935               else if (fcode == ARM_BUILTIN_WSLLWI)
25936                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi32 in code.");
25937               else if (fcode == ARM_BUILTIN_WSLLDI)
25938                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_si64 in code.");
25939               else if (fcode == ARM_BUILTIN_WSRAHI)
25940                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi16 in code.");
25941               else if (fcode == ARM_BUILTIN_WSRAWI)
25942                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi32 in code.");
25943               else if (fcode == ARM_BUILTIN_WSRADI)
25944                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_si64 in code.");
25945               else if (fcode == ARM_BUILTIN_WSRLH)
25946                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi16 in code.");
25947               else if (fcode == ARM_BUILTIN_WSRLW)
25948                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi32 in code.");
25949               else if (fcode == ARM_BUILTIN_WSRLD)
25950                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_si64 in code.");
25951               else if (fcode == ARM_BUILTIN_WSLLH)
25952                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi16 in code.");
25953               else if (fcode == ARM_BUILTIN_WSLLW)
25954                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi32 in code.");
25955               else if (fcode == ARM_BUILTIN_WSLLD)
25956                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_si64 in code.");
25957               else if (fcode == ARM_BUILTIN_WSRAH)
25958                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi16 in code.");
25959               else if (fcode == ARM_BUILTIN_WSRAW)
25960                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi32 in code.");
25961               else
25962                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_si64 in code.");
25963             }
25964         }
25965       return arm_expand_binop_builtin (icode, exp, target);
25966
25967     default:
25968       break;
25969     }
25970
25971   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25972     if (d->code == (const enum arm_builtins) fcode)
25973       return arm_expand_binop_builtin (d->icode, exp, target);
25974
25975   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25976     if (d->code == (const enum arm_builtins) fcode)
25977       return arm_expand_unop_builtin (d->icode, exp, target, 0);
25978
25979   for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25980     if (d->code == (const enum arm_builtins) fcode)
25981       return arm_expand_ternop_builtin (d->icode, exp, target);
25982
25983   /* @@@ Should really do something sensible here.  */
25984   return NULL_RTX;
25985 }
25986 \f
25987 /* Return the number (counting from 0) of
25988    the least significant set bit in MASK.  */
25989
25990 inline static int
25991 number_of_first_bit_set (unsigned mask)
25992 {
25993   return ctz_hwi (mask);
25994 }
25995
25996 /* Like emit_multi_reg_push, but allowing for a different set of
25997    registers to be described as saved.  MASK is the set of registers
25998    to be saved; REAL_REGS is the set of registers to be described as
25999    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26000
26001 static rtx
26002 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26003 {
26004   unsigned long regno;
26005   rtx par[10], tmp, reg, insn;
26006   int i, j;
26007
26008   /* Build the parallel of the registers actually being stored.  */
26009   for (i = 0; mask; ++i, mask &= mask - 1)
26010     {
26011       regno = ctz_hwi (mask);
26012       reg = gen_rtx_REG (SImode, regno);
26013
26014       if (i == 0)
26015         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26016       else
26017         tmp = gen_rtx_USE (VOIDmode, reg);
26018
26019       par[i] = tmp;
26020     }
26021
26022   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26023   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26024   tmp = gen_frame_mem (BLKmode, tmp);
26025   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26026   par[0] = tmp;
26027
26028   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26029   insn = emit_insn (tmp);
26030
26031   /* Always build the stack adjustment note for unwind info.  */
26032   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26033   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26034   par[0] = tmp;
26035
26036   /* Build the parallel of the registers recorded as saved for unwind.  */
26037   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26038     {
26039       regno = ctz_hwi (real_regs);
26040       reg = gen_rtx_REG (SImode, regno);
26041
26042       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26043       tmp = gen_frame_mem (SImode, tmp);
26044       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26045       RTX_FRAME_RELATED_P (tmp) = 1;
26046       par[j + 1] = tmp;
26047     }
26048
26049   if (j == 0)
26050     tmp = par[0];
26051   else
26052     {
26053       RTX_FRAME_RELATED_P (par[0]) = 1;
26054       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26055     }
26056
26057   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26058
26059   return insn;
26060 }
26061
26062 /* Emit code to push or pop registers to or from the stack.  F is the
26063    assembly file.  MASK is the registers to pop.  */
26064 static void
26065 thumb_pop (FILE *f, unsigned long mask)
26066 {
26067   int regno;
26068   int lo_mask = mask & 0xFF;
26069   int pushed_words = 0;
26070
26071   gcc_assert (mask);
26072
26073   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26074     {
26075       /* Special case.  Do not generate a POP PC statement here, do it in
26076          thumb_exit() */
26077       thumb_exit (f, -1);
26078       return;
26079     }
26080
26081   fprintf (f, "\tpop\t{");
26082
26083   /* Look at the low registers first.  */
26084   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26085     {
26086       if (lo_mask & 1)
26087         {
26088           asm_fprintf (f, "%r", regno);
26089
26090           if ((lo_mask & ~1) != 0)
26091             fprintf (f, ", ");
26092
26093           pushed_words++;
26094         }
26095     }
26096
26097   if (mask & (1 << PC_REGNUM))
26098     {
26099       /* Catch popping the PC.  */
26100       if (TARGET_INTERWORK || TARGET_BACKTRACE
26101           || crtl->calls_eh_return)
26102         {
26103           /* The PC is never poped directly, instead
26104              it is popped into r3 and then BX is used.  */
26105           fprintf (f, "}\n");
26106
26107           thumb_exit (f, -1);
26108
26109           return;
26110         }
26111       else
26112         {
26113           if (mask & 0xFF)
26114             fprintf (f, ", ");
26115
26116           asm_fprintf (f, "%r", PC_REGNUM);
26117         }
26118     }
26119
26120   fprintf (f, "}\n");
26121 }
26122
26123 /* Generate code to return from a thumb function.
26124    If 'reg_containing_return_addr' is -1, then the return address is
26125    actually on the stack, at the stack pointer.  */
26126 static void
26127 thumb_exit (FILE *f, int reg_containing_return_addr)
26128 {
26129   unsigned regs_available_for_popping;
26130   unsigned regs_to_pop;
26131   int pops_needed;
26132   unsigned available;
26133   unsigned required;
26134   enum machine_mode mode;
26135   int size;
26136   int restore_a4 = FALSE;
26137
26138   /* Compute the registers we need to pop.  */
26139   regs_to_pop = 0;
26140   pops_needed = 0;
26141
26142   if (reg_containing_return_addr == -1)
26143     {
26144       regs_to_pop |= 1 << LR_REGNUM;
26145       ++pops_needed;
26146     }
26147
26148   if (TARGET_BACKTRACE)
26149     {
26150       /* Restore the (ARM) frame pointer and stack pointer.  */
26151       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26152       pops_needed += 2;
26153     }
26154
26155   /* If there is nothing to pop then just emit the BX instruction and
26156      return.  */
26157   if (pops_needed == 0)
26158     {
26159       if (crtl->calls_eh_return)
26160         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26161
26162       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26163       return;
26164     }
26165   /* Otherwise if we are not supporting interworking and we have not created
26166      a backtrace structure and the function was not entered in ARM mode then
26167      just pop the return address straight into the PC.  */
26168   else if (!TARGET_INTERWORK
26169            && !TARGET_BACKTRACE
26170            && !is_called_in_ARM_mode (current_function_decl)
26171            && !crtl->calls_eh_return)
26172     {
26173       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26174       return;
26175     }
26176
26177   /* Find out how many of the (return) argument registers we can corrupt.  */
26178   regs_available_for_popping = 0;
26179
26180   /* If returning via __builtin_eh_return, the bottom three registers
26181      all contain information needed for the return.  */
26182   if (crtl->calls_eh_return)
26183     size = 12;
26184   else
26185     {
26186       /* If we can deduce the registers used from the function's
26187          return value.  This is more reliable that examining
26188          df_regs_ever_live_p () because that will be set if the register is
26189          ever used in the function, not just if the register is used
26190          to hold a return value.  */
26191
26192       if (crtl->return_rtx != 0)
26193         mode = GET_MODE (crtl->return_rtx);
26194       else
26195         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26196
26197       size = GET_MODE_SIZE (mode);
26198
26199       if (size == 0)
26200         {
26201           /* In a void function we can use any argument register.
26202              In a function that returns a structure on the stack
26203              we can use the second and third argument registers.  */
26204           if (mode == VOIDmode)
26205             regs_available_for_popping =
26206               (1 << ARG_REGISTER (1))
26207               | (1 << ARG_REGISTER (2))
26208               | (1 << ARG_REGISTER (3));
26209           else
26210             regs_available_for_popping =
26211               (1 << ARG_REGISTER (2))
26212               | (1 << ARG_REGISTER (3));
26213         }
26214       else if (size <= 4)
26215         regs_available_for_popping =
26216           (1 << ARG_REGISTER (2))
26217           | (1 << ARG_REGISTER (3));
26218       else if (size <= 8)
26219         regs_available_for_popping =
26220           (1 << ARG_REGISTER (3));
26221     }
26222
26223   /* Match registers to be popped with registers into which we pop them.  */
26224   for (available = regs_available_for_popping,
26225        required  = regs_to_pop;
26226        required != 0 && available != 0;
26227        available &= ~(available & - available),
26228        required  &= ~(required  & - required))
26229     -- pops_needed;
26230
26231   /* If we have any popping registers left over, remove them.  */
26232   if (available > 0)
26233     regs_available_for_popping &= ~available;
26234
26235   /* Otherwise if we need another popping register we can use
26236      the fourth argument register.  */
26237   else if (pops_needed)
26238     {
26239       /* If we have not found any free argument registers and
26240          reg a4 contains the return address, we must move it.  */
26241       if (regs_available_for_popping == 0
26242           && reg_containing_return_addr == LAST_ARG_REGNUM)
26243         {
26244           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26245           reg_containing_return_addr = LR_REGNUM;
26246         }
26247       else if (size > 12)
26248         {
26249           /* Register a4 is being used to hold part of the return value,
26250              but we have dire need of a free, low register.  */
26251           restore_a4 = TRUE;
26252
26253           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26254         }
26255
26256       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26257         {
26258           /* The fourth argument register is available.  */
26259           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26260
26261           --pops_needed;
26262         }
26263     }
26264
26265   /* Pop as many registers as we can.  */
26266   thumb_pop (f, regs_available_for_popping);
26267
26268   /* Process the registers we popped.  */
26269   if (reg_containing_return_addr == -1)
26270     {
26271       /* The return address was popped into the lowest numbered register.  */
26272       regs_to_pop &= ~(1 << LR_REGNUM);
26273
26274       reg_containing_return_addr =
26275         number_of_first_bit_set (regs_available_for_popping);
26276
26277       /* Remove this register for the mask of available registers, so that
26278          the return address will not be corrupted by further pops.  */
26279       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26280     }
26281
26282   /* If we popped other registers then handle them here.  */
26283   if (regs_available_for_popping)
26284     {
26285       int frame_pointer;
26286
26287       /* Work out which register currently contains the frame pointer.  */
26288       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26289
26290       /* Move it into the correct place.  */
26291       asm_fprintf (f, "\tmov\t%r, %r\n",
26292                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26293
26294       /* (Temporarily) remove it from the mask of popped registers.  */
26295       regs_available_for_popping &= ~(1 << frame_pointer);
26296       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26297
26298       if (regs_available_for_popping)
26299         {
26300           int stack_pointer;
26301
26302           /* We popped the stack pointer as well,
26303              find the register that contains it.  */
26304           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26305
26306           /* Move it into the stack register.  */
26307           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26308
26309           /* At this point we have popped all necessary registers, so
26310              do not worry about restoring regs_available_for_popping
26311              to its correct value:
26312
26313              assert (pops_needed == 0)
26314              assert (regs_available_for_popping == (1 << frame_pointer))
26315              assert (regs_to_pop == (1 << STACK_POINTER))  */
26316         }
26317       else
26318         {
26319           /* Since we have just move the popped value into the frame
26320              pointer, the popping register is available for reuse, and
26321              we know that we still have the stack pointer left to pop.  */
26322           regs_available_for_popping |= (1 << frame_pointer);
26323         }
26324     }
26325
26326   /* If we still have registers left on the stack, but we no longer have
26327      any registers into which we can pop them, then we must move the return
26328      address into the link register and make available the register that
26329      contained it.  */
26330   if (regs_available_for_popping == 0 && pops_needed > 0)
26331     {
26332       regs_available_for_popping |= 1 << reg_containing_return_addr;
26333
26334       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26335                    reg_containing_return_addr);
26336
26337       reg_containing_return_addr = LR_REGNUM;
26338     }
26339
26340   /* If we have registers left on the stack then pop some more.
26341      We know that at most we will want to pop FP and SP.  */
26342   if (pops_needed > 0)
26343     {
26344       int  popped_into;
26345       int  move_to;
26346
26347       thumb_pop (f, regs_available_for_popping);
26348
26349       /* We have popped either FP or SP.
26350          Move whichever one it is into the correct register.  */
26351       popped_into = number_of_first_bit_set (regs_available_for_popping);
26352       move_to     = number_of_first_bit_set (regs_to_pop);
26353
26354       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26355
26356       regs_to_pop &= ~(1 << move_to);
26357
26358       --pops_needed;
26359     }
26360
26361   /* If we still have not popped everything then we must have only
26362      had one register available to us and we are now popping the SP.  */
26363   if (pops_needed > 0)
26364     {
26365       int  popped_into;
26366
26367       thumb_pop (f, regs_available_for_popping);
26368
26369       popped_into = number_of_first_bit_set (regs_available_for_popping);
26370
26371       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26372       /*
26373         assert (regs_to_pop == (1 << STACK_POINTER))
26374         assert (pops_needed == 1)
26375       */
26376     }
26377
26378   /* If necessary restore the a4 register.  */
26379   if (restore_a4)
26380     {
26381       if (reg_containing_return_addr != LR_REGNUM)
26382         {
26383           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26384           reg_containing_return_addr = LR_REGNUM;
26385         }
26386
26387       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26388     }
26389
26390   if (crtl->calls_eh_return)
26391     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26392
26393   /* Return to caller.  */
26394   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26395 }
26396 \f
26397 /* Scan INSN just before assembler is output for it.
26398    For Thumb-1, we track the status of the condition codes; this
26399    information is used in the cbranchsi4_insn pattern.  */
26400 void
26401 thumb1_final_prescan_insn (rtx insn)
26402 {
26403   if (flag_print_asm_name)
26404     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26405                  INSN_ADDRESSES (INSN_UID (insn)));
26406   /* Don't overwrite the previous setter when we get to a cbranch.  */
26407   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26408     {
26409       enum attr_conds conds;
26410
26411       if (cfun->machine->thumb1_cc_insn)
26412         {
26413           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26414               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26415             CC_STATUS_INIT;
26416         }
26417       conds = get_attr_conds (insn);
26418       if (conds == CONDS_SET)
26419         {
26420           rtx set = single_set (insn);
26421           cfun->machine->thumb1_cc_insn = insn;
26422           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26423           cfun->machine->thumb1_cc_op1 = const0_rtx;
26424           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26425           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26426             {
26427               rtx src1 = XEXP (SET_SRC (set), 1);
26428               if (src1 == const0_rtx)
26429                 cfun->machine->thumb1_cc_mode = CCmode;
26430             }
26431           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26432             {
26433               /* Record the src register operand instead of dest because
26434                  cprop_hardreg pass propagates src.  */
26435               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26436             }
26437         }
26438       else if (conds != CONDS_NOCOND)
26439         cfun->machine->thumb1_cc_insn = NULL_RTX;
26440     }
26441
26442     /* Check if unexpected far jump is used.  */
26443     if (cfun->machine->lr_save_eliminated
26444         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26445       internal_error("Unexpected thumb1 far jump");
26446 }
26447
26448 int
26449 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26450 {
26451   unsigned HOST_WIDE_INT mask = 0xff;
26452   int i;
26453
26454   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26455   if (val == 0) /* XXX */
26456     return 0;
26457
26458   for (i = 0; i < 25; i++)
26459     if ((val & (mask << i)) == val)
26460       return 1;
26461
26462   return 0;
26463 }
26464
26465 /* Returns nonzero if the current function contains,
26466    or might contain a far jump.  */
26467 static int
26468 thumb_far_jump_used_p (void)
26469 {
26470   rtx insn;
26471   bool far_jump = false;
26472   unsigned int func_size = 0;
26473
26474   /* This test is only important for leaf functions.  */
26475   /* assert (!leaf_function_p ()); */
26476
26477   /* If we have already decided that far jumps may be used,
26478      do not bother checking again, and always return true even if
26479      it turns out that they are not being used.  Once we have made
26480      the decision that far jumps are present (and that hence the link
26481      register will be pushed onto the stack) we cannot go back on it.  */
26482   if (cfun->machine->far_jump_used)
26483     return 1;
26484
26485   /* If this function is not being called from the prologue/epilogue
26486      generation code then it must be being called from the
26487      INITIAL_ELIMINATION_OFFSET macro.  */
26488   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26489     {
26490       /* In this case we know that we are being asked about the elimination
26491          of the arg pointer register.  If that register is not being used,
26492          then there are no arguments on the stack, and we do not have to
26493          worry that a far jump might force the prologue to push the link
26494          register, changing the stack offsets.  In this case we can just
26495          return false, since the presence of far jumps in the function will
26496          not affect stack offsets.
26497
26498          If the arg pointer is live (or if it was live, but has now been
26499          eliminated and so set to dead) then we do have to test to see if
26500          the function might contain a far jump.  This test can lead to some
26501          false negatives, since before reload is completed, then length of
26502          branch instructions is not known, so gcc defaults to returning their
26503          longest length, which in turn sets the far jump attribute to true.
26504
26505          A false negative will not result in bad code being generated, but it
26506          will result in a needless push and pop of the link register.  We
26507          hope that this does not occur too often.
26508
26509          If we need doubleword stack alignment this could affect the other
26510          elimination offsets so we can't risk getting it wrong.  */
26511       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26512         cfun->machine->arg_pointer_live = 1;
26513       else if (!cfun->machine->arg_pointer_live)
26514         return 0;
26515     }
26516
26517   /* We should not change far_jump_used during or after reload, as there is
26518      no chance to change stack frame layout.  */
26519   if (reload_in_progress || reload_completed)
26520     return 0;
26521
26522   /* Check to see if the function contains a branch
26523      insn with the far jump attribute set.  */
26524   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26525     {
26526       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26527         {
26528           far_jump = true;
26529         }
26530       func_size += get_attr_length (insn);
26531     }
26532
26533   /* Attribute far_jump will always be true for thumb1 before
26534      shorten_branch pass.  So checking far_jump attribute before
26535      shorten_branch isn't much useful.
26536
26537      Following heuristic tries to estimate more accurately if a far jump
26538      may finally be used.  The heuristic is very conservative as there is
26539      no chance to roll-back the decision of not to use far jump.
26540
26541      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26542      2-byte insn is associated with a 4 byte constant pool.  Using
26543      function size 2048/3 as the threshold is conservative enough.  */
26544   if (far_jump)
26545     {
26546       if ((func_size * 3) >= 2048)
26547         {
26548           /* Record the fact that we have decided that
26549              the function does use far jumps.  */
26550           cfun->machine->far_jump_used = 1;
26551           return 1;
26552         }
26553     }
26554
26555   return 0;
26556 }
26557
26558 /* Return nonzero if FUNC must be entered in ARM mode.  */
26559 int
26560 is_called_in_ARM_mode (tree func)
26561 {
26562   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26563
26564   /* Ignore the problem about functions whose address is taken.  */
26565   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26566     return TRUE;
26567
26568 #ifdef ARM_PE
26569   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26570 #else
26571   return FALSE;
26572 #endif
26573 }
26574
26575 /* Given the stack offsets and register mask in OFFSETS, decide how
26576    many additional registers to push instead of subtracting a constant
26577    from SP.  For epilogues the principle is the same except we use pop.
26578    FOR_PROLOGUE indicates which we're generating.  */
26579 static int
26580 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26581 {
26582   HOST_WIDE_INT amount;
26583   unsigned long live_regs_mask = offsets->saved_regs_mask;
26584   /* Extract a mask of the ones we can give to the Thumb's push/pop
26585      instruction.  */
26586   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26587   /* Then count how many other high registers will need to be pushed.  */
26588   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26589   int n_free, reg_base, size;
26590
26591   if (!for_prologue && frame_pointer_needed)
26592     amount = offsets->locals_base - offsets->saved_regs;
26593   else
26594     amount = offsets->outgoing_args - offsets->saved_regs;
26595
26596   /* If the stack frame size is 512 exactly, we can save one load
26597      instruction, which should make this a win even when optimizing
26598      for speed.  */
26599   if (!optimize_size && amount != 512)
26600     return 0;
26601
26602   /* Can't do this if there are high registers to push.  */
26603   if (high_regs_pushed != 0)
26604     return 0;
26605
26606   /* Shouldn't do it in the prologue if no registers would normally
26607      be pushed at all.  In the epilogue, also allow it if we'll have
26608      a pop insn for the PC.  */
26609   if  (l_mask == 0
26610        && (for_prologue
26611            || TARGET_BACKTRACE
26612            || (live_regs_mask & 1 << LR_REGNUM) == 0
26613            || TARGET_INTERWORK
26614            || crtl->args.pretend_args_size != 0))
26615     return 0;
26616
26617   /* Don't do this if thumb_expand_prologue wants to emit instructions
26618      between the push and the stack frame allocation.  */
26619   if (for_prologue
26620       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26621           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26622     return 0;
26623
26624   reg_base = 0;
26625   n_free = 0;
26626   if (!for_prologue)
26627     {
26628       size = arm_size_return_regs ();
26629       reg_base = ARM_NUM_INTS (size);
26630       live_regs_mask >>= reg_base;
26631     }
26632
26633   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26634          && (for_prologue || call_used_regs[reg_base + n_free]))
26635     {
26636       live_regs_mask >>= 1;
26637       n_free++;
26638     }
26639
26640   if (n_free == 0)
26641     return 0;
26642   gcc_assert (amount / 4 * 4 == amount);
26643
26644   if (amount >= 512 && (amount - n_free * 4) < 512)
26645     return (amount - 508) / 4;
26646   if (amount <= n_free * 4)
26647     return amount / 4;
26648   return 0;
26649 }
26650
26651 /* The bits which aren't usefully expanded as rtl.  */
26652 const char *
26653 thumb1_unexpanded_epilogue (void)
26654 {
26655   arm_stack_offsets *offsets;
26656   int regno;
26657   unsigned long live_regs_mask = 0;
26658   int high_regs_pushed = 0;
26659   int extra_pop;
26660   int had_to_push_lr;
26661   int size;
26662
26663   if (cfun->machine->return_used_this_function != 0)
26664     return "";
26665
26666   if (IS_NAKED (arm_current_func_type ()))
26667     return "";
26668
26669   offsets = arm_get_frame_offsets ();
26670   live_regs_mask = offsets->saved_regs_mask;
26671   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26672
26673   /* If we can deduce the registers used from the function's return value.
26674      This is more reliable that examining df_regs_ever_live_p () because that
26675      will be set if the register is ever used in the function, not just if
26676      the register is used to hold a return value.  */
26677   size = arm_size_return_regs ();
26678
26679   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26680   if (extra_pop > 0)
26681     {
26682       unsigned long extra_mask = (1 << extra_pop) - 1;
26683       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26684     }
26685
26686   /* The prolog may have pushed some high registers to use as
26687      work registers.  e.g. the testsuite file:
26688      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26689      compiles to produce:
26690         push    {r4, r5, r6, r7, lr}
26691         mov     r7, r9
26692         mov     r6, r8
26693         push    {r6, r7}
26694      as part of the prolog.  We have to undo that pushing here.  */
26695
26696   if (high_regs_pushed)
26697     {
26698       unsigned long mask = live_regs_mask & 0xff;
26699       int next_hi_reg;
26700
26701       /* The available low registers depend on the size of the value we are
26702          returning.  */
26703       if (size <= 12)
26704         mask |=  1 << 3;
26705       if (size <= 8)
26706         mask |= 1 << 2;
26707
26708       if (mask == 0)
26709         /* Oh dear!  We have no low registers into which we can pop
26710            high registers!  */
26711         internal_error
26712           ("no low registers available for popping high registers");
26713
26714       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26715         if (live_regs_mask & (1 << next_hi_reg))
26716           break;
26717
26718       while (high_regs_pushed)
26719         {
26720           /* Find lo register(s) into which the high register(s) can
26721              be popped.  */
26722           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26723             {
26724               if (mask & (1 << regno))
26725                 high_regs_pushed--;
26726               if (high_regs_pushed == 0)
26727                 break;
26728             }
26729
26730           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
26731
26732           /* Pop the values into the low register(s).  */
26733           thumb_pop (asm_out_file, mask);
26734
26735           /* Move the value(s) into the high registers.  */
26736           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26737             {
26738               if (mask & (1 << regno))
26739                 {
26740                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26741                                regno);
26742
26743                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26744                     if (live_regs_mask & (1 << next_hi_reg))
26745                       break;
26746                 }
26747             }
26748         }
26749       live_regs_mask &= ~0x0f00;
26750     }
26751
26752   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26753   live_regs_mask &= 0xff;
26754
26755   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26756     {
26757       /* Pop the return address into the PC.  */
26758       if (had_to_push_lr)
26759         live_regs_mask |= 1 << PC_REGNUM;
26760
26761       /* Either no argument registers were pushed or a backtrace
26762          structure was created which includes an adjusted stack
26763          pointer, so just pop everything.  */
26764       if (live_regs_mask)
26765         thumb_pop (asm_out_file, live_regs_mask);
26766
26767       /* We have either just popped the return address into the
26768          PC or it is was kept in LR for the entire function.
26769          Note that thumb_pop has already called thumb_exit if the
26770          PC was in the list.  */
26771       if (!had_to_push_lr)
26772         thumb_exit (asm_out_file, LR_REGNUM);
26773     }
26774   else
26775     {
26776       /* Pop everything but the return address.  */
26777       if (live_regs_mask)
26778         thumb_pop (asm_out_file, live_regs_mask);
26779
26780       if (had_to_push_lr)
26781         {
26782           if (size > 12)
26783             {
26784               /* We have no free low regs, so save one.  */
26785               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26786                            LAST_ARG_REGNUM);
26787             }
26788
26789           /* Get the return address into a temporary register.  */
26790           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26791
26792           if (size > 12)
26793             {
26794               /* Move the return address to lr.  */
26795               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26796                            LAST_ARG_REGNUM);
26797               /* Restore the low register.  */
26798               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26799                            IP_REGNUM);
26800               regno = LR_REGNUM;
26801             }
26802           else
26803             regno = LAST_ARG_REGNUM;
26804         }
26805       else
26806         regno = LR_REGNUM;
26807
26808       /* Remove the argument registers that were pushed onto the stack.  */
26809       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26810                    SP_REGNUM, SP_REGNUM,
26811                    crtl->args.pretend_args_size);
26812
26813       thumb_exit (asm_out_file, regno);
26814     }
26815
26816   return "";
26817 }
26818
26819 /* Functions to save and restore machine-specific function data.  */
26820 static struct machine_function *
26821 arm_init_machine_status (void)
26822 {
26823   struct machine_function *machine;
26824   machine = ggc_alloc_cleared_machine_function ();
26825
26826 #if ARM_FT_UNKNOWN != 0
26827   machine->func_type = ARM_FT_UNKNOWN;
26828 #endif
26829   return machine;
26830 }
26831
26832 /* Return an RTX indicating where the return address to the
26833    calling function can be found.  */
26834 rtx
26835 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26836 {
26837   if (count != 0)
26838     return NULL_RTX;
26839
26840   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26841 }
26842
26843 /* Do anything needed before RTL is emitted for each function.  */
26844 void
26845 arm_init_expanders (void)
26846 {
26847   /* Arrange to initialize and mark the machine per-function status.  */
26848   init_machine_status = arm_init_machine_status;
26849
26850   /* This is to stop the combine pass optimizing away the alignment
26851      adjustment of va_arg.  */
26852   /* ??? It is claimed that this should not be necessary.  */
26853   if (cfun)
26854     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26855 }
26856
26857
26858 /* Like arm_compute_initial_elimination offset.  Simpler because there
26859    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26860    to point at the base of the local variables after static stack
26861    space for a function has been allocated.  */
26862
26863 HOST_WIDE_INT
26864 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26865 {
26866   arm_stack_offsets *offsets;
26867
26868   offsets = arm_get_frame_offsets ();
26869
26870   switch (from)
26871     {
26872     case ARG_POINTER_REGNUM:
26873       switch (to)
26874         {
26875         case STACK_POINTER_REGNUM:
26876           return offsets->outgoing_args - offsets->saved_args;
26877
26878         case FRAME_POINTER_REGNUM:
26879           return offsets->soft_frame - offsets->saved_args;
26880
26881         case ARM_HARD_FRAME_POINTER_REGNUM:
26882           return offsets->saved_regs - offsets->saved_args;
26883
26884         case THUMB_HARD_FRAME_POINTER_REGNUM:
26885           return offsets->locals_base - offsets->saved_args;
26886
26887         default:
26888           gcc_unreachable ();
26889         }
26890       break;
26891
26892     case FRAME_POINTER_REGNUM:
26893       switch (to)
26894         {
26895         case STACK_POINTER_REGNUM:
26896           return offsets->outgoing_args - offsets->soft_frame;
26897
26898         case ARM_HARD_FRAME_POINTER_REGNUM:
26899           return offsets->saved_regs - offsets->soft_frame;
26900
26901         case THUMB_HARD_FRAME_POINTER_REGNUM:
26902           return offsets->locals_base - offsets->soft_frame;
26903
26904         default:
26905           gcc_unreachable ();
26906         }
26907       break;
26908
26909     default:
26910       gcc_unreachable ();
26911     }
26912 }
26913
26914 /* Generate the function's prologue.  */
26915
26916 void
26917 thumb1_expand_prologue (void)
26918 {
26919   rtx insn;
26920
26921   HOST_WIDE_INT amount;
26922   arm_stack_offsets *offsets;
26923   unsigned long func_type;
26924   int regno;
26925   unsigned long live_regs_mask;
26926   unsigned long l_mask;
26927   unsigned high_regs_pushed = 0;
26928
26929   func_type = arm_current_func_type ();
26930
26931   /* Naked functions don't have prologues.  */
26932   if (IS_NAKED (func_type))
26933     return;
26934
26935   if (IS_INTERRUPT (func_type))
26936     {
26937       error ("interrupt Service Routines cannot be coded in Thumb mode");
26938       return;
26939     }
26940
26941   if (is_called_in_ARM_mode (current_function_decl))
26942     emit_insn (gen_prologue_thumb1_interwork ());
26943
26944   offsets = arm_get_frame_offsets ();
26945   live_regs_mask = offsets->saved_regs_mask;
26946
26947   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
26948   l_mask = live_regs_mask & 0x40ff;
26949   /* Then count how many other high registers will need to be pushed.  */
26950   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26951
26952   if (crtl->args.pretend_args_size)
26953     {
26954       rtx x = GEN_INT (-crtl->args.pretend_args_size);
26955
26956       if (cfun->machine->uses_anonymous_args)
26957         {
26958           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26959           unsigned long mask;
26960
26961           mask = 1ul << (LAST_ARG_REGNUM + 1);
26962           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26963
26964           insn = thumb1_emit_multi_reg_push (mask, 0);
26965         }
26966       else
26967         {
26968           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26969                                         stack_pointer_rtx, x));
26970         }
26971       RTX_FRAME_RELATED_P (insn) = 1;
26972     }
26973
26974   if (TARGET_BACKTRACE)
26975     {
26976       HOST_WIDE_INT offset = 0;
26977       unsigned work_register;
26978       rtx work_reg, x, arm_hfp_rtx;
26979
26980       /* We have been asked to create a stack backtrace structure.
26981          The code looks like this:
26982
26983          0   .align 2
26984          0   func:
26985          0     sub   SP, #16         Reserve space for 4 registers.
26986          2     push  {R7}            Push low registers.
26987          4     add   R7, SP, #20     Get the stack pointer before the push.
26988          6     str   R7, [SP, #8]    Store the stack pointer
26989                                         (before reserving the space).
26990          8     mov   R7, PC          Get hold of the start of this code + 12.
26991         10     str   R7, [SP, #16]   Store it.
26992         12     mov   R7, FP          Get hold of the current frame pointer.
26993         14     str   R7, [SP, #4]    Store it.
26994         16     mov   R7, LR          Get hold of the current return address.
26995         18     str   R7, [SP, #12]   Store it.
26996         20     add   R7, SP, #16     Point at the start of the
26997                                         backtrace structure.
26998         22     mov   FP, R7          Put this value into the frame pointer.  */
26999
27000       work_register = thumb_find_work_register (live_regs_mask);
27001       work_reg = gen_rtx_REG (SImode, work_register);
27002       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27003
27004       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27005                                     stack_pointer_rtx, GEN_INT (-16)));
27006       RTX_FRAME_RELATED_P (insn) = 1;
27007
27008       if (l_mask)
27009         {
27010           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27011           RTX_FRAME_RELATED_P (insn) = 1;
27012
27013           offset = bit_count (l_mask) * UNITS_PER_WORD;
27014         }
27015
27016       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27017       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27018
27019       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27020       x = gen_frame_mem (SImode, x);
27021       emit_move_insn (x, work_reg);
27022
27023       /* Make sure that the instruction fetching the PC is in the right place
27024          to calculate "start of backtrace creation code + 12".  */
27025       /* ??? The stores using the common WORK_REG ought to be enough to
27026          prevent the scheduler from doing anything weird.  Failing that
27027          we could always move all of the following into an UNSPEC_VOLATILE.  */
27028       if (l_mask)
27029         {
27030           x = gen_rtx_REG (SImode, PC_REGNUM);
27031           emit_move_insn (work_reg, x);
27032
27033           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27034           x = gen_frame_mem (SImode, x);
27035           emit_move_insn (x, work_reg);
27036
27037           emit_move_insn (work_reg, arm_hfp_rtx);
27038
27039           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27040           x = gen_frame_mem (SImode, x);
27041           emit_move_insn (x, work_reg);
27042         }
27043       else
27044         {
27045           emit_move_insn (work_reg, arm_hfp_rtx);
27046
27047           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27048           x = gen_frame_mem (SImode, x);
27049           emit_move_insn (x, work_reg);
27050
27051           x = gen_rtx_REG (SImode, PC_REGNUM);
27052           emit_move_insn (work_reg, x);
27053
27054           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27055           x = gen_frame_mem (SImode, x);
27056           emit_move_insn (x, work_reg);
27057         }
27058
27059       x = gen_rtx_REG (SImode, LR_REGNUM);
27060       emit_move_insn (work_reg, x);
27061
27062       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27063       x = gen_frame_mem (SImode, x);
27064       emit_move_insn (x, work_reg);
27065
27066       x = GEN_INT (offset + 12);
27067       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27068
27069       emit_move_insn (arm_hfp_rtx, work_reg);
27070     }
27071   /* Optimization:  If we are not pushing any low registers but we are going
27072      to push some high registers then delay our first push.  This will just
27073      be a push of LR and we can combine it with the push of the first high
27074      register.  */
27075   else if ((l_mask & 0xff) != 0
27076            || (high_regs_pushed == 0 && l_mask))
27077     {
27078       unsigned long mask = l_mask;
27079       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27080       insn = thumb1_emit_multi_reg_push (mask, mask);
27081       RTX_FRAME_RELATED_P (insn) = 1;
27082     }
27083
27084   if (high_regs_pushed)
27085     {
27086       unsigned pushable_regs;
27087       unsigned next_hi_reg;
27088       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27089                                                  : crtl->args.info.nregs;
27090       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27091
27092       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27093         if (live_regs_mask & (1 << next_hi_reg))
27094           break;
27095
27096       /* Here we need to mask out registers used for passing arguments
27097          even if they can be pushed.  This is to avoid using them to stash the high
27098          registers.  Such kind of stash may clobber the use of arguments.  */
27099       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27100
27101       if (pushable_regs == 0)
27102         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27103
27104       while (high_regs_pushed > 0)
27105         {
27106           unsigned long real_regs_mask = 0;
27107
27108           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27109             {
27110               if (pushable_regs & (1 << regno))
27111                 {
27112                   emit_move_insn (gen_rtx_REG (SImode, regno),
27113                                   gen_rtx_REG (SImode, next_hi_reg));
27114
27115                   high_regs_pushed --;
27116                   real_regs_mask |= (1 << next_hi_reg);
27117
27118                   if (high_regs_pushed)
27119                     {
27120                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27121                            next_hi_reg --)
27122                         if (live_regs_mask & (1 << next_hi_reg))
27123                           break;
27124                     }
27125                   else
27126                     {
27127                       pushable_regs &= ~((1 << regno) - 1);
27128                       break;
27129                     }
27130                 }
27131             }
27132
27133           /* If we had to find a work register and we have not yet
27134              saved the LR then add it to the list of regs to push.  */
27135           if (l_mask == (1 << LR_REGNUM))
27136             {
27137               pushable_regs |= l_mask;
27138               real_regs_mask |= l_mask;
27139               l_mask = 0;
27140             }
27141
27142           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27143           RTX_FRAME_RELATED_P (insn) = 1;
27144         }
27145     }
27146
27147   /* Load the pic register before setting the frame pointer,
27148      so we can use r7 as a temporary work register.  */
27149   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27150     arm_load_pic_register (live_regs_mask);
27151
27152   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27153     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27154                     stack_pointer_rtx);
27155
27156   if (flag_stack_usage_info)
27157     current_function_static_stack_size
27158       = offsets->outgoing_args - offsets->saved_args;
27159
27160   amount = offsets->outgoing_args - offsets->saved_regs;
27161   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27162   if (amount)
27163     {
27164       if (amount < 512)
27165         {
27166           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27167                                         GEN_INT (- amount)));
27168           RTX_FRAME_RELATED_P (insn) = 1;
27169         }
27170       else
27171         {
27172           rtx reg, dwarf;
27173
27174           /* The stack decrement is too big for an immediate value in a single
27175              insn.  In theory we could issue multiple subtracts, but after
27176              three of them it becomes more space efficient to place the full
27177              value in the constant pool and load into a register.  (Also the
27178              ARM debugger really likes to see only one stack decrement per
27179              function).  So instead we look for a scratch register into which
27180              we can load the decrement, and then we subtract this from the
27181              stack pointer.  Unfortunately on the thumb the only available
27182              scratch registers are the argument registers, and we cannot use
27183              these as they may hold arguments to the function.  Instead we
27184              attempt to locate a call preserved register which is used by this
27185              function.  If we can find one, then we know that it will have
27186              been pushed at the start of the prologue and so we can corrupt
27187              it now.  */
27188           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27189             if (live_regs_mask & (1 << regno))
27190               break;
27191
27192           gcc_assert(regno <= LAST_LO_REGNUM);
27193
27194           reg = gen_rtx_REG (SImode, regno);
27195
27196           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27197
27198           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27199                                         stack_pointer_rtx, reg));
27200
27201           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27202                                plus_constant (Pmode, stack_pointer_rtx,
27203                                               -amount));
27204           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27205           RTX_FRAME_RELATED_P (insn) = 1;
27206         }
27207     }
27208
27209   if (frame_pointer_needed)
27210     thumb_set_frame_pointer (offsets);
27211
27212   /* If we are profiling, make sure no instructions are scheduled before
27213      the call to mcount.  Similarly if the user has requested no
27214      scheduling in the prolog.  Similarly if we want non-call exceptions
27215      using the EABI unwinder, to prevent faulting instructions from being
27216      swapped with a stack adjustment.  */
27217   if (crtl->profile || !TARGET_SCHED_PROLOG
27218       || (arm_except_unwind_info (&global_options) == UI_TARGET
27219           && cfun->can_throw_non_call_exceptions))
27220     emit_insn (gen_blockage ());
27221
27222   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27223   if (live_regs_mask & 0xff)
27224     cfun->machine->lr_save_eliminated = 0;
27225 }
27226
27227 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27228    POP instruction can be generated.  LR should be replaced by PC.  All
27229    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27230    all we really need to check here is if single register is to be
27231    returned, or multiple register return.  */
27232 void
27233 thumb2_expand_return (bool simple_return)
27234 {
27235   int i, num_regs;
27236   unsigned long saved_regs_mask;
27237   arm_stack_offsets *offsets;
27238
27239   offsets = arm_get_frame_offsets ();
27240   saved_regs_mask = offsets->saved_regs_mask;
27241
27242   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27243     if (saved_regs_mask & (1 << i))
27244       num_regs++;
27245
27246   if (!simple_return && saved_regs_mask)
27247     {
27248       if (num_regs == 1)
27249         {
27250           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27251           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27252           rtx addr = gen_rtx_MEM (SImode,
27253                                   gen_rtx_POST_INC (SImode,
27254                                                     stack_pointer_rtx));
27255           set_mem_alias_set (addr, get_frame_alias_set ());
27256           XVECEXP (par, 0, 0) = ret_rtx;
27257           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27258           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27259           emit_jump_insn (par);
27260         }
27261       else
27262         {
27263           saved_regs_mask &= ~ (1 << LR_REGNUM);
27264           saved_regs_mask |=   (1 << PC_REGNUM);
27265           arm_emit_multi_reg_pop (saved_regs_mask);
27266         }
27267     }
27268   else
27269     {
27270       emit_jump_insn (simple_return_rtx);
27271     }
27272 }
27273
27274 void
27275 thumb1_expand_epilogue (void)
27276 {
27277   HOST_WIDE_INT amount;
27278   arm_stack_offsets *offsets;
27279   int regno;
27280
27281   /* Naked functions don't have prologues.  */
27282   if (IS_NAKED (arm_current_func_type ()))
27283     return;
27284
27285   offsets = arm_get_frame_offsets ();
27286   amount = offsets->outgoing_args - offsets->saved_regs;
27287
27288   if (frame_pointer_needed)
27289     {
27290       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27291       amount = offsets->locals_base - offsets->saved_regs;
27292     }
27293   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27294
27295   gcc_assert (amount >= 0);
27296   if (amount)
27297     {
27298       emit_insn (gen_blockage ());
27299
27300       if (amount < 512)
27301         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27302                                GEN_INT (amount)));
27303       else
27304         {
27305           /* r3 is always free in the epilogue.  */
27306           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27307
27308           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27309           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27310         }
27311     }
27312
27313   /* Emit a USE (stack_pointer_rtx), so that
27314      the stack adjustment will not be deleted.  */
27315   emit_insn (gen_force_register_use (stack_pointer_rtx));
27316
27317   if (crtl->profile || !TARGET_SCHED_PROLOG)
27318     emit_insn (gen_blockage ());
27319
27320   /* Emit a clobber for each insn that will be restored in the epilogue,
27321      so that flow2 will get register lifetimes correct.  */
27322   for (regno = 0; regno < 13; regno++)
27323     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27324       emit_clobber (gen_rtx_REG (SImode, regno));
27325
27326   if (! df_regs_ever_live_p (LR_REGNUM))
27327     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27328 }
27329
27330 /* Epilogue code for APCS frame.  */
27331 static void
27332 arm_expand_epilogue_apcs_frame (bool really_return)
27333 {
27334   unsigned long func_type;
27335   unsigned long saved_regs_mask;
27336   int num_regs = 0;
27337   int i;
27338   int floats_from_frame = 0;
27339   arm_stack_offsets *offsets;
27340
27341   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27342   func_type = arm_current_func_type ();
27343
27344   /* Get frame offsets for ARM.  */
27345   offsets = arm_get_frame_offsets ();
27346   saved_regs_mask = offsets->saved_regs_mask;
27347
27348   /* Find the offset of the floating-point save area in the frame.  */
27349   floats_from_frame
27350     = (offsets->saved_args
27351        + arm_compute_static_chain_stack_bytes ()
27352        - offsets->frame);
27353
27354   /* Compute how many core registers saved and how far away the floats are.  */
27355   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27356     if (saved_regs_mask & (1 << i))
27357       {
27358         num_regs++;
27359         floats_from_frame += 4;
27360       }
27361
27362   if (TARGET_HARD_FLOAT && TARGET_VFP)
27363     {
27364       int start_reg;
27365       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27366
27367       /* The offset is from IP_REGNUM.  */
27368       int saved_size = arm_get_vfp_saved_size ();
27369       if (saved_size > 0)
27370         {
27371           rtx insn;
27372           floats_from_frame += saved_size;
27373           insn = emit_insn (gen_addsi3 (ip_rtx,
27374                                         hard_frame_pointer_rtx,
27375                                         GEN_INT (-floats_from_frame)));
27376           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27377                                        ip_rtx, hard_frame_pointer_rtx);
27378         }
27379
27380       /* Generate VFP register multi-pop.  */
27381       start_reg = FIRST_VFP_REGNUM;
27382
27383       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27384         /* Look for a case where a reg does not need restoring.  */
27385         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27386             && (!df_regs_ever_live_p (i + 1)
27387                 || call_used_regs[i + 1]))
27388           {
27389             if (start_reg != i)
27390               arm_emit_vfp_multi_reg_pop (start_reg,
27391                                           (i - start_reg) / 2,
27392                                           gen_rtx_REG (SImode,
27393                                                        IP_REGNUM));
27394             start_reg = i + 2;
27395           }
27396
27397       /* Restore the remaining regs that we have discovered (or possibly
27398          even all of them, if the conditional in the for loop never
27399          fired).  */
27400       if (start_reg != i)
27401         arm_emit_vfp_multi_reg_pop (start_reg,
27402                                     (i - start_reg) / 2,
27403                                     gen_rtx_REG (SImode, IP_REGNUM));
27404     }
27405
27406   if (TARGET_IWMMXT)
27407     {
27408       /* The frame pointer is guaranteed to be non-double-word aligned, as
27409          it is set to double-word-aligned old_stack_pointer - 4.  */
27410       rtx insn;
27411       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27412
27413       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27414         if (df_regs_ever_live_p (i) && !call_used_regs[i])
27415           {
27416             rtx addr = gen_frame_mem (V2SImode,
27417                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27418                                                 - lrm_count * 4));
27419             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27420             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27421                                                gen_rtx_REG (V2SImode, i),
27422                                                NULL_RTX);
27423             lrm_count += 2;
27424           }
27425     }
27426
27427   /* saved_regs_mask should contain IP which contains old stack pointer
27428      at the time of activation creation.  Since SP and IP are adjacent registers,
27429      we can restore the value directly into SP.  */
27430   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27431   saved_regs_mask &= ~(1 << IP_REGNUM);
27432   saved_regs_mask |= (1 << SP_REGNUM);
27433
27434   /* There are two registers left in saved_regs_mask - LR and PC.  We
27435      only need to restore LR (the return address), but to
27436      save time we can load it directly into PC, unless we need a
27437      special function exit sequence, or we are not really returning.  */
27438   if (really_return
27439       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27440       && !crtl->calls_eh_return)
27441     /* Delete LR from the register mask, so that LR on
27442        the stack is loaded into the PC in the register mask.  */
27443     saved_regs_mask &= ~(1 << LR_REGNUM);
27444   else
27445     saved_regs_mask &= ~(1 << PC_REGNUM);
27446
27447   num_regs = bit_count (saved_regs_mask);
27448   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27449     {
27450       rtx insn;
27451       emit_insn (gen_blockage ());
27452       /* Unwind the stack to just below the saved registers.  */
27453       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27454                                     hard_frame_pointer_rtx,
27455                                     GEN_INT (- 4 * num_regs)));
27456
27457       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27458                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27459     }
27460
27461   arm_emit_multi_reg_pop (saved_regs_mask);
27462
27463   if (IS_INTERRUPT (func_type))
27464     {
27465       /* Interrupt handlers will have pushed the
27466          IP onto the stack, so restore it now.  */
27467       rtx insn;
27468       rtx addr = gen_rtx_MEM (SImode,
27469                               gen_rtx_POST_INC (SImode,
27470                               stack_pointer_rtx));
27471       set_mem_alias_set (addr, get_frame_alias_set ());
27472       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27473       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27474                                          gen_rtx_REG (SImode, IP_REGNUM),
27475                                          NULL_RTX);
27476     }
27477
27478   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27479     return;
27480
27481   if (crtl->calls_eh_return)
27482     emit_insn (gen_addsi3 (stack_pointer_rtx,
27483                            stack_pointer_rtx,
27484                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27485
27486   if (IS_STACKALIGN (func_type))
27487     /* Restore the original stack pointer.  Before prologue, the stack was
27488        realigned and the original stack pointer saved in r0.  For details,
27489        see comment in arm_expand_prologue.  */
27490     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27491
27492   emit_jump_insn (simple_return_rtx);
27493 }
27494
27495 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27496    function is not a sibcall.  */
27497 void
27498 arm_expand_epilogue (bool really_return)
27499 {
27500   unsigned long func_type;
27501   unsigned long saved_regs_mask;
27502   int num_regs = 0;
27503   int i;
27504   int amount;
27505   arm_stack_offsets *offsets;
27506
27507   func_type = arm_current_func_type ();
27508
27509   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27510      let output_return_instruction take care of instruction emission if any.  */
27511   if (IS_NAKED (func_type)
27512       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27513     {
27514       if (really_return)
27515         emit_jump_insn (simple_return_rtx);
27516       return;
27517     }
27518
27519   /* If we are throwing an exception, then we really must be doing a
27520      return, so we can't tail-call.  */
27521   gcc_assert (!crtl->calls_eh_return || really_return);
27522
27523   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27524     {
27525       arm_expand_epilogue_apcs_frame (really_return);
27526       return;
27527     }
27528
27529   /* Get frame offsets for ARM.  */
27530   offsets = arm_get_frame_offsets ();
27531   saved_regs_mask = offsets->saved_regs_mask;
27532   num_regs = bit_count (saved_regs_mask);
27533
27534   if (frame_pointer_needed)
27535     {
27536       rtx insn;
27537       /* Restore stack pointer if necessary.  */
27538       if (TARGET_ARM)
27539         {
27540           /* In ARM mode, frame pointer points to first saved register.
27541              Restore stack pointer to last saved register.  */
27542           amount = offsets->frame - offsets->saved_regs;
27543
27544           /* Force out any pending memory operations that reference stacked data
27545              before stack de-allocation occurs.  */
27546           emit_insn (gen_blockage ());
27547           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27548                             hard_frame_pointer_rtx,
27549                             GEN_INT (amount)));
27550           arm_add_cfa_adjust_cfa_note (insn, amount,
27551                                        stack_pointer_rtx,
27552                                        hard_frame_pointer_rtx);
27553
27554           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27555              deleted.  */
27556           emit_insn (gen_force_register_use (stack_pointer_rtx));
27557         }
27558       else
27559         {
27560           /* In Thumb-2 mode, the frame pointer points to the last saved
27561              register.  */
27562           amount = offsets->locals_base - offsets->saved_regs;
27563           if (amount)
27564             {
27565               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27566                                 hard_frame_pointer_rtx,
27567                                 GEN_INT (amount)));
27568               arm_add_cfa_adjust_cfa_note (insn, amount,
27569                                            hard_frame_pointer_rtx,
27570                                            hard_frame_pointer_rtx);
27571             }
27572
27573           /* Force out any pending memory operations that reference stacked data
27574              before stack de-allocation occurs.  */
27575           emit_insn (gen_blockage ());
27576           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27577                                        hard_frame_pointer_rtx));
27578           arm_add_cfa_adjust_cfa_note (insn, 0,
27579                                        stack_pointer_rtx,
27580                                        hard_frame_pointer_rtx);
27581           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27582              deleted.  */
27583           emit_insn (gen_force_register_use (stack_pointer_rtx));
27584         }
27585     }
27586   else
27587     {
27588       /* Pop off outgoing args and local frame to adjust stack pointer to
27589          last saved register.  */
27590       amount = offsets->outgoing_args - offsets->saved_regs;
27591       if (amount)
27592         {
27593           rtx tmp;
27594           /* Force out any pending memory operations that reference stacked data
27595              before stack de-allocation occurs.  */
27596           emit_insn (gen_blockage ());
27597           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27598                                        stack_pointer_rtx,
27599                                        GEN_INT (amount)));
27600           arm_add_cfa_adjust_cfa_note (tmp, amount,
27601                                        stack_pointer_rtx, stack_pointer_rtx);
27602           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27603              not deleted.  */
27604           emit_insn (gen_force_register_use (stack_pointer_rtx));
27605         }
27606     }
27607
27608   if (TARGET_HARD_FLOAT && TARGET_VFP)
27609     {
27610       /* Generate VFP register multi-pop.  */
27611       int end_reg = LAST_VFP_REGNUM + 1;
27612
27613       /* Scan the registers in reverse order.  We need to match
27614          any groupings made in the prologue and generate matching
27615          vldm operations.  The need to match groups is because,
27616          unlike pop, vldm can only do consecutive regs.  */
27617       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27618         /* Look for a case where a reg does not need restoring.  */
27619         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27620             && (!df_regs_ever_live_p (i + 1)
27621                 || call_used_regs[i + 1]))
27622           {
27623             /* Restore the regs discovered so far (from reg+2 to
27624                end_reg).  */
27625             if (end_reg > i + 2)
27626               arm_emit_vfp_multi_reg_pop (i + 2,
27627                                           (end_reg - (i + 2)) / 2,
27628                                           stack_pointer_rtx);
27629             end_reg = i;
27630           }
27631
27632       /* Restore the remaining regs that we have discovered (or possibly
27633          even all of them, if the conditional in the for loop never
27634          fired).  */
27635       if (end_reg > i + 2)
27636         arm_emit_vfp_multi_reg_pop (i + 2,
27637                                     (end_reg - (i + 2)) / 2,
27638                                     stack_pointer_rtx);
27639     }
27640
27641   if (TARGET_IWMMXT)
27642     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27643       if (df_regs_ever_live_p (i) && !call_used_regs[i])
27644         {
27645           rtx insn;
27646           rtx addr = gen_rtx_MEM (V2SImode,
27647                                   gen_rtx_POST_INC (SImode,
27648                                                     stack_pointer_rtx));
27649           set_mem_alias_set (addr, get_frame_alias_set ());
27650           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27651           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27652                                              gen_rtx_REG (V2SImode, i),
27653                                              NULL_RTX);
27654           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27655                                        stack_pointer_rtx, stack_pointer_rtx);
27656         }
27657
27658   if (saved_regs_mask)
27659     {
27660       rtx insn;
27661       bool return_in_pc = false;
27662
27663       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27664           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27665           && !IS_STACKALIGN (func_type)
27666           && really_return
27667           && crtl->args.pretend_args_size == 0
27668           && saved_regs_mask & (1 << LR_REGNUM)
27669           && !crtl->calls_eh_return)
27670         {
27671           saved_regs_mask &= ~(1 << LR_REGNUM);
27672           saved_regs_mask |= (1 << PC_REGNUM);
27673           return_in_pc = true;
27674         }
27675
27676       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27677         {
27678           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27679             if (saved_regs_mask & (1 << i))
27680               {
27681                 rtx addr = gen_rtx_MEM (SImode,
27682                                         gen_rtx_POST_INC (SImode,
27683                                                           stack_pointer_rtx));
27684                 set_mem_alias_set (addr, get_frame_alias_set ());
27685
27686                 if (i == PC_REGNUM)
27687                   {
27688                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27689                     XVECEXP (insn, 0, 0) = ret_rtx;
27690                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27691                                                         gen_rtx_REG (SImode, i),
27692                                                         addr);
27693                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27694                     insn = emit_jump_insn (insn);
27695                   }
27696                 else
27697                   {
27698                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27699                                                  addr));
27700                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27701                                                        gen_rtx_REG (SImode, i),
27702                                                        NULL_RTX);
27703                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27704                                                  stack_pointer_rtx,
27705                                                  stack_pointer_rtx);
27706                   }
27707               }
27708         }
27709       else
27710         {
27711           if (TARGET_LDRD
27712               && current_tune->prefer_ldrd_strd
27713               && !optimize_function_for_size_p (cfun))
27714             {
27715               if (TARGET_THUMB2)
27716                 thumb2_emit_ldrd_pop (saved_regs_mask);
27717               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27718                 arm_emit_ldrd_pop (saved_regs_mask);
27719               else
27720                 arm_emit_multi_reg_pop (saved_regs_mask);
27721             }
27722           else
27723             arm_emit_multi_reg_pop (saved_regs_mask);
27724         }
27725
27726       if (return_in_pc == true)
27727         return;
27728     }
27729
27730   if (crtl->args.pretend_args_size)
27731     {
27732       int i, j;
27733       rtx dwarf = NULL_RTX;
27734       rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27735                            stack_pointer_rtx,
27736                            GEN_INT (crtl->args.pretend_args_size)));
27737
27738       RTX_FRAME_RELATED_P (tmp) = 1;
27739
27740       if (cfun->machine->uses_anonymous_args)
27741         {
27742           /* Restore pretend args.  Refer arm_expand_prologue on how to save
27743              pretend_args in stack.  */
27744           int num_regs = crtl->args.pretend_args_size / 4;
27745           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27746           for (j = 0, i = 0; j < num_regs; i++)
27747             if (saved_regs_mask & (1 << i))
27748               {
27749                 rtx reg = gen_rtx_REG (SImode, i);
27750                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27751                 j++;
27752               }
27753           REG_NOTES (tmp) = dwarf;
27754         }
27755       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27756                                    stack_pointer_rtx, stack_pointer_rtx);
27757     }
27758
27759   if (!really_return)
27760     return;
27761
27762   if (crtl->calls_eh_return)
27763     emit_insn (gen_addsi3 (stack_pointer_rtx,
27764                            stack_pointer_rtx,
27765                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27766
27767   if (IS_STACKALIGN (func_type))
27768     /* Restore the original stack pointer.  Before prologue, the stack was
27769        realigned and the original stack pointer saved in r0.  For details,
27770        see comment in arm_expand_prologue.  */
27771     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27772
27773   emit_jump_insn (simple_return_rtx);
27774 }
27775
27776 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27777    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27778
27779 const char *
27780 thumb1_output_interwork (void)
27781 {
27782   const char * name;
27783   FILE *f = asm_out_file;
27784
27785   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27786   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27787               == SYMBOL_REF);
27788   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27789
27790   /* Generate code sequence to switch us into Thumb mode.  */
27791   /* The .code 32 directive has already been emitted by
27792      ASM_DECLARE_FUNCTION_NAME.  */
27793   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27794   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27795
27796   /* Generate a label, so that the debugger will notice the
27797      change in instruction sets.  This label is also used by
27798      the assembler to bypass the ARM code when this function
27799      is called from a Thumb encoded function elsewhere in the
27800      same file.  Hence the definition of STUB_NAME here must
27801      agree with the definition in gas/config/tc-arm.c.  */
27802
27803 #define STUB_NAME ".real_start_of"
27804
27805   fprintf (f, "\t.code\t16\n");
27806 #ifdef ARM_PE
27807   if (arm_dllexport_name_p (name))
27808     name = arm_strip_name_encoding (name);
27809 #endif
27810   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27811   fprintf (f, "\t.thumb_func\n");
27812   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27813
27814   return "";
27815 }
27816
27817 /* Handle the case of a double word load into a low register from
27818    a computed memory address.  The computed address may involve a
27819    register which is overwritten by the load.  */
27820 const char *
27821 thumb_load_double_from_address (rtx *operands)
27822 {
27823   rtx addr;
27824   rtx base;
27825   rtx offset;
27826   rtx arg1;
27827   rtx arg2;
27828
27829   gcc_assert (REG_P (operands[0]));
27830   gcc_assert (MEM_P (operands[1]));
27831
27832   /* Get the memory address.  */
27833   addr = XEXP (operands[1], 0);
27834
27835   /* Work out how the memory address is computed.  */
27836   switch (GET_CODE (addr))
27837     {
27838     case REG:
27839       operands[2] = adjust_address (operands[1], SImode, 4);
27840
27841       if (REGNO (operands[0]) == REGNO (addr))
27842         {
27843           output_asm_insn ("ldr\t%H0, %2", operands);
27844           output_asm_insn ("ldr\t%0, %1", operands);
27845         }
27846       else
27847         {
27848           output_asm_insn ("ldr\t%0, %1", operands);
27849           output_asm_insn ("ldr\t%H0, %2", operands);
27850         }
27851       break;
27852
27853     case CONST:
27854       /* Compute <address> + 4 for the high order load.  */
27855       operands[2] = adjust_address (operands[1], SImode, 4);
27856
27857       output_asm_insn ("ldr\t%0, %1", operands);
27858       output_asm_insn ("ldr\t%H0, %2", operands);
27859       break;
27860
27861     case PLUS:
27862       arg1   = XEXP (addr, 0);
27863       arg2   = XEXP (addr, 1);
27864
27865       if (CONSTANT_P (arg1))
27866         base = arg2, offset = arg1;
27867       else
27868         base = arg1, offset = arg2;
27869
27870       gcc_assert (REG_P (base));
27871
27872       /* Catch the case of <address> = <reg> + <reg> */
27873       if (REG_P (offset))
27874         {
27875           int reg_offset = REGNO (offset);
27876           int reg_base   = REGNO (base);
27877           int reg_dest   = REGNO (operands[0]);
27878
27879           /* Add the base and offset registers together into the
27880              higher destination register.  */
27881           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27882                        reg_dest + 1, reg_base, reg_offset);
27883
27884           /* Load the lower destination register from the address in
27885              the higher destination register.  */
27886           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27887                        reg_dest, reg_dest + 1);
27888
27889           /* Load the higher destination register from its own address
27890              plus 4.  */
27891           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27892                        reg_dest + 1, reg_dest + 1);
27893         }
27894       else
27895         {
27896           /* Compute <address> + 4 for the high order load.  */
27897           operands[2] = adjust_address (operands[1], SImode, 4);
27898
27899           /* If the computed address is held in the low order register
27900              then load the high order register first, otherwise always
27901              load the low order register first.  */
27902           if (REGNO (operands[0]) == REGNO (base))
27903             {
27904               output_asm_insn ("ldr\t%H0, %2", operands);
27905               output_asm_insn ("ldr\t%0, %1", operands);
27906             }
27907           else
27908             {
27909               output_asm_insn ("ldr\t%0, %1", operands);
27910               output_asm_insn ("ldr\t%H0, %2", operands);
27911             }
27912         }
27913       break;
27914
27915     case LABEL_REF:
27916       /* With no registers to worry about we can just load the value
27917          directly.  */
27918       operands[2] = adjust_address (operands[1], SImode, 4);
27919
27920       output_asm_insn ("ldr\t%H0, %2", operands);
27921       output_asm_insn ("ldr\t%0, %1", operands);
27922       break;
27923
27924     default:
27925       gcc_unreachable ();
27926     }
27927
27928   return "";
27929 }
27930
27931 const char *
27932 thumb_output_move_mem_multiple (int n, rtx *operands)
27933 {
27934   rtx tmp;
27935
27936   switch (n)
27937     {
27938     case 2:
27939       if (REGNO (operands[4]) > REGNO (operands[5]))
27940         {
27941           tmp = operands[4];
27942           operands[4] = operands[5];
27943           operands[5] = tmp;
27944         }
27945       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27946       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27947       break;
27948
27949     case 3:
27950       if (REGNO (operands[4]) > REGNO (operands[5]))
27951         {
27952           tmp = operands[4];
27953           operands[4] = operands[5];
27954           operands[5] = tmp;
27955         }
27956       if (REGNO (operands[5]) > REGNO (operands[6]))
27957         {
27958           tmp = operands[5];
27959           operands[5] = operands[6];
27960           operands[6] = tmp;
27961         }
27962       if (REGNO (operands[4]) > REGNO (operands[5]))
27963         {
27964           tmp = operands[4];
27965           operands[4] = operands[5];
27966           operands[5] = tmp;
27967         }
27968
27969       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27970       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27971       break;
27972
27973     default:
27974       gcc_unreachable ();
27975     }
27976
27977   return "";
27978 }
27979
27980 /* Output a call-via instruction for thumb state.  */
27981 const char *
27982 thumb_call_via_reg (rtx reg)
27983 {
27984   int regno = REGNO (reg);
27985   rtx *labelp;
27986
27987   gcc_assert (regno < LR_REGNUM);
27988
27989   /* If we are in the normal text section we can use a single instance
27990      per compilation unit.  If we are doing function sections, then we need
27991      an entry per section, since we can't rely on reachability.  */
27992   if (in_section == text_section)
27993     {
27994       thumb_call_reg_needed = 1;
27995
27996       if (thumb_call_via_label[regno] == NULL)
27997         thumb_call_via_label[regno] = gen_label_rtx ();
27998       labelp = thumb_call_via_label + regno;
27999     }
28000   else
28001     {
28002       if (cfun->machine->call_via[regno] == NULL)
28003         cfun->machine->call_via[regno] = gen_label_rtx ();
28004       labelp = cfun->machine->call_via + regno;
28005     }
28006
28007   output_asm_insn ("bl\t%a0", labelp);
28008   return "";
28009 }
28010
28011 /* Routines for generating rtl.  */
28012 void
28013 thumb_expand_movmemqi (rtx *operands)
28014 {
28015   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28016   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28017   HOST_WIDE_INT len = INTVAL (operands[2]);
28018   HOST_WIDE_INT offset = 0;
28019
28020   while (len >= 12)
28021     {
28022       emit_insn (gen_movmem12b (out, in, out, in));
28023       len -= 12;
28024     }
28025
28026   if (len >= 8)
28027     {
28028       emit_insn (gen_movmem8b (out, in, out, in));
28029       len -= 8;
28030     }
28031
28032   if (len >= 4)
28033     {
28034       rtx reg = gen_reg_rtx (SImode);
28035       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28036       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28037       len -= 4;
28038       offset += 4;
28039     }
28040
28041   if (len >= 2)
28042     {
28043       rtx reg = gen_reg_rtx (HImode);
28044       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28045                                               plus_constant (Pmode, in,
28046                                                              offset))));
28047       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28048                                                                 offset)),
28049                             reg));
28050       len -= 2;
28051       offset += 2;
28052     }
28053
28054   if (len)
28055     {
28056       rtx reg = gen_reg_rtx (QImode);
28057       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28058                                               plus_constant (Pmode, in,
28059                                                              offset))));
28060       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28061                                                                 offset)),
28062                             reg));
28063     }
28064 }
28065
28066 void
28067 thumb_reload_out_hi (rtx *operands)
28068 {
28069   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28070 }
28071
28072 /* Handle reading a half-word from memory during reload.  */
28073 void
28074 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28075 {
28076   gcc_unreachable ();
28077 }
28078
28079 /* Return the length of a function name prefix
28080     that starts with the character 'c'.  */
28081 static int
28082 arm_get_strip_length (int c)
28083 {
28084   switch (c)
28085     {
28086     ARM_NAME_ENCODING_LENGTHS
28087       default: return 0;
28088     }
28089 }
28090
28091 /* Return a pointer to a function's name with any
28092    and all prefix encodings stripped from it.  */
28093 const char *
28094 arm_strip_name_encoding (const char *name)
28095 {
28096   int skip;
28097
28098   while ((skip = arm_get_strip_length (* name)))
28099     name += skip;
28100
28101   return name;
28102 }
28103
28104 /* If there is a '*' anywhere in the name's prefix, then
28105    emit the stripped name verbatim, otherwise prepend an
28106    underscore if leading underscores are being used.  */
28107 void
28108 arm_asm_output_labelref (FILE *stream, const char *name)
28109 {
28110   int skip;
28111   int verbatim = 0;
28112
28113   while ((skip = arm_get_strip_length (* name)))
28114     {
28115       verbatim |= (*name == '*');
28116       name += skip;
28117     }
28118
28119   if (verbatim)
28120     fputs (name, stream);
28121   else
28122     asm_fprintf (stream, "%U%s", name);
28123 }
28124
28125 /* This function is used to emit an EABI tag and its associated value.
28126    We emit the numerical value of the tag in case the assembler does not
28127    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28128    the tag name in a comment so that anyone reading the assembler output
28129    will know which tag is being set.
28130
28131    This function is not static because arm-c.c needs it too.  */
28132
28133 void
28134 arm_emit_eabi_attribute (const char *name, int num, int val)
28135 {
28136   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28137   if (flag_verbose_asm || flag_debug_asm)
28138     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28139   asm_fprintf (asm_out_file, "\n");
28140 }
28141
28142 static void
28143 arm_file_start (void)
28144 {
28145   int val;
28146
28147   if (TARGET_UNIFIED_ASM)
28148     asm_fprintf (asm_out_file, "\t.syntax unified\n");
28149
28150   if (TARGET_BPABI)
28151     {
28152       const char *fpu_name;
28153       if (arm_selected_arch)
28154         {
28155           /* armv7ve doesn't support any extensions.  */
28156           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28157             {
28158               /* Keep backward compatability for assemblers
28159                  which don't support armv7ve.  */
28160               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28161               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28162               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28163               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28164               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28165             }
28166           else
28167             {
28168               const char* pos = strchr (arm_selected_arch->name, '+');
28169               if (pos)
28170                 {
28171                   char buf[15];
28172                   gcc_assert (strlen (arm_selected_arch->name)
28173                               <= sizeof (buf) / sizeof (*pos));
28174                   strncpy (buf, arm_selected_arch->name,
28175                                 (pos - arm_selected_arch->name) * sizeof (*pos));
28176                   buf[pos - arm_selected_arch->name] = '\0';
28177                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28178                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28179                 }
28180               else
28181                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28182             }
28183         }
28184       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28185         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28186       else
28187         {
28188           const char* truncated_name
28189             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28190           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28191         }
28192
28193       if (TARGET_SOFT_FLOAT)
28194         {
28195           fpu_name = "softvfp";
28196         }
28197       else
28198         {
28199           fpu_name = arm_fpu_desc->name;
28200           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28201             {
28202               if (TARGET_HARD_FLOAT)
28203                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28204               if (TARGET_HARD_FLOAT_ABI)
28205                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28206             }
28207         }
28208       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28209
28210       /* Some of these attributes only apply when the corresponding features
28211          are used.  However we don't have any easy way of figuring this out.
28212          Conservatively record the setting that would have been used.  */
28213
28214       if (flag_rounding_math)
28215         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28216
28217       if (!flag_unsafe_math_optimizations)
28218         {
28219           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28220           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28221         }
28222       if (flag_signaling_nans)
28223         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28224
28225       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28226                            flag_finite_math_only ? 1 : 3);
28227
28228       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28229       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28230       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28231                                flag_short_enums ? 1 : 2);
28232
28233       /* Tag_ABI_optimization_goals.  */
28234       if (optimize_size)
28235         val = 4;
28236       else if (optimize >= 2)
28237         val = 2;
28238       else if (optimize)
28239         val = 1;
28240       else
28241         val = 6;
28242       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28243
28244       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28245                                unaligned_access);
28246
28247       if (arm_fp16_format)
28248         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28249                              (int) arm_fp16_format);
28250
28251       if (arm_lang_output_object_attributes_hook)
28252         arm_lang_output_object_attributes_hook();
28253     }
28254
28255   default_file_start ();
28256 }
28257
28258 static void
28259 arm_file_end (void)
28260 {
28261   int regno;
28262
28263   if (NEED_INDICATE_EXEC_STACK)
28264     /* Add .note.GNU-stack.  */
28265     file_end_indicate_exec_stack ();
28266
28267   if (! thumb_call_reg_needed)
28268     return;
28269
28270   switch_to_section (text_section);
28271   asm_fprintf (asm_out_file, "\t.code 16\n");
28272   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28273
28274   for (regno = 0; regno < LR_REGNUM; regno++)
28275     {
28276       rtx label = thumb_call_via_label[regno];
28277
28278       if (label != 0)
28279         {
28280           targetm.asm_out.internal_label (asm_out_file, "L",
28281                                           CODE_LABEL_NUMBER (label));
28282           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28283         }
28284     }
28285 }
28286
28287 #ifndef ARM_PE
28288 /* Symbols in the text segment can be accessed without indirecting via the
28289    constant pool; it may take an extra binary operation, but this is still
28290    faster than indirecting via memory.  Don't do this when not optimizing,
28291    since we won't be calculating al of the offsets necessary to do this
28292    simplification.  */
28293
28294 static void
28295 arm_encode_section_info (tree decl, rtx rtl, int first)
28296 {
28297   if (optimize > 0 && TREE_CONSTANT (decl))
28298     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28299
28300   default_encode_section_info (decl, rtl, first);
28301 }
28302 #endif /* !ARM_PE */
28303
28304 static void
28305 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28306 {
28307   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28308       && !strcmp (prefix, "L"))
28309     {
28310       arm_ccfsm_state = 0;
28311       arm_target_insn = NULL;
28312     }
28313   default_internal_label (stream, prefix, labelno);
28314 }
28315
28316 /* Output code to add DELTA to the first argument, and then jump
28317    to FUNCTION.  Used for C++ multiple inheritance.  */
28318 static void
28319 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28320                      HOST_WIDE_INT delta,
28321                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28322                      tree function)
28323 {
28324   static int thunk_label = 0;
28325   char label[256];
28326   char labelpc[256];
28327   int mi_delta = delta;
28328   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28329   int shift = 0;
28330   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28331                     ? 1 : 0);
28332   if (mi_delta < 0)
28333     mi_delta = - mi_delta;
28334
28335   final_start_function (emit_barrier (), file, 1);
28336
28337   if (TARGET_THUMB1)
28338     {
28339       int labelno = thunk_label++;
28340       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28341       /* Thunks are entered in arm mode when avaiable.  */
28342       if (TARGET_THUMB1_ONLY)
28343         {
28344           /* push r3 so we can use it as a temporary.  */
28345           /* TODO: Omit this save if r3 is not used.  */
28346           fputs ("\tpush {r3}\n", file);
28347           fputs ("\tldr\tr3, ", file);
28348         }
28349       else
28350         {
28351           fputs ("\tldr\tr12, ", file);
28352         }
28353       assemble_name (file, label);
28354       fputc ('\n', file);
28355       if (flag_pic)
28356         {
28357           /* If we are generating PIC, the ldr instruction below loads
28358              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28359              the address of the add + 8, so we have:
28360
28361              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28362                  = target + 1.
28363
28364              Note that we have "+ 1" because some versions of GNU ld
28365              don't set the low bit of the result for R_ARM_REL32
28366              relocations against thumb function symbols.
28367              On ARMv6M this is +4, not +8.  */
28368           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28369           assemble_name (file, labelpc);
28370           fputs (":\n", file);
28371           if (TARGET_THUMB1_ONLY)
28372             {
28373               /* This is 2 insns after the start of the thunk, so we know it
28374                  is 4-byte aligned.  */
28375               fputs ("\tadd\tr3, pc, r3\n", file);
28376               fputs ("\tmov r12, r3\n", file);
28377             }
28378           else
28379             fputs ("\tadd\tr12, pc, r12\n", file);
28380         }
28381       else if (TARGET_THUMB1_ONLY)
28382         fputs ("\tmov r12, r3\n", file);
28383     }
28384   if (TARGET_THUMB1_ONLY)
28385     {
28386       if (mi_delta > 255)
28387         {
28388           fputs ("\tldr\tr3, ", file);
28389           assemble_name (file, label);
28390           fputs ("+4\n", file);
28391           asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28392                        mi_op, this_regno, this_regno);
28393         }
28394       else if (mi_delta != 0)
28395         {
28396           asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28397                        mi_op, this_regno, this_regno,
28398                        mi_delta);
28399         }
28400     }
28401   else
28402     {
28403       /* TODO: Use movw/movt for large constants when available.  */
28404       while (mi_delta != 0)
28405         {
28406           if ((mi_delta & (3 << shift)) == 0)
28407             shift += 2;
28408           else
28409             {
28410               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28411                            mi_op, this_regno, this_regno,
28412                            mi_delta & (0xff << shift));
28413               mi_delta &= ~(0xff << shift);
28414               shift += 8;
28415             }
28416         }
28417     }
28418   if (TARGET_THUMB1)
28419     {
28420       if (TARGET_THUMB1_ONLY)
28421         fputs ("\tpop\t{r3}\n", file);
28422
28423       fprintf (file, "\tbx\tr12\n");
28424       ASM_OUTPUT_ALIGN (file, 2);
28425       assemble_name (file, label);
28426       fputs (":\n", file);
28427       if (flag_pic)
28428         {
28429           /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
28430           rtx tem = XEXP (DECL_RTL (function), 0);
28431           tem = plus_constant (GET_MODE (tem), tem, -7);
28432           tem = gen_rtx_MINUS (GET_MODE (tem),
28433                                tem,
28434                                gen_rtx_SYMBOL_REF (Pmode,
28435                                                    ggc_strdup (labelpc)));
28436           assemble_integer (tem, 4, BITS_PER_WORD, 1);
28437         }
28438       else
28439         /* Output ".word .LTHUNKn".  */
28440         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28441
28442       if (TARGET_THUMB1_ONLY && mi_delta > 255)
28443         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28444     }
28445   else
28446     {
28447       fputs ("\tb\t", file);
28448       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28449       if (NEED_PLT_RELOC)
28450         fputs ("(PLT)", file);
28451       fputc ('\n', file);
28452     }
28453
28454   final_end_function ();
28455 }
28456
28457 int
28458 arm_emit_vector_const (FILE *file, rtx x)
28459 {
28460   int i;
28461   const char * pattern;
28462
28463   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28464
28465   switch (GET_MODE (x))
28466     {
28467     case V2SImode: pattern = "%08x"; break;
28468     case V4HImode: pattern = "%04x"; break;
28469     case V8QImode: pattern = "%02x"; break;
28470     default:       gcc_unreachable ();
28471     }
28472
28473   fprintf (file, "0x");
28474   for (i = CONST_VECTOR_NUNITS (x); i--;)
28475     {
28476       rtx element;
28477
28478       element = CONST_VECTOR_ELT (x, i);
28479       fprintf (file, pattern, INTVAL (element));
28480     }
28481
28482   return 1;
28483 }
28484
28485 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28486    HFmode constant pool entries are actually loaded with ldr.  */
28487 void
28488 arm_emit_fp16_const (rtx c)
28489 {
28490   REAL_VALUE_TYPE r;
28491   long bits;
28492
28493   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28494   bits = real_to_target (NULL, &r, HFmode);
28495   if (WORDS_BIG_ENDIAN)
28496     assemble_zeros (2);
28497   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28498   if (!WORDS_BIG_ENDIAN)
28499     assemble_zeros (2);
28500 }
28501
28502 const char *
28503 arm_output_load_gr (rtx *operands)
28504 {
28505   rtx reg;
28506   rtx offset;
28507   rtx wcgr;
28508   rtx sum;
28509
28510   if (!MEM_P (operands [1])
28511       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28512       || !REG_P (reg = XEXP (sum, 0))
28513       || !CONST_INT_P (offset = XEXP (sum, 1))
28514       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28515     return "wldrw%?\t%0, %1";
28516
28517   /* Fix up an out-of-range load of a GR register.  */
28518   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28519   wcgr = operands[0];
28520   operands[0] = reg;
28521   output_asm_insn ("ldr%?\t%0, %1", operands);
28522
28523   operands[0] = wcgr;
28524   operands[1] = reg;
28525   output_asm_insn ("tmcr%?\t%0, %1", operands);
28526   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28527
28528   return "";
28529 }
28530
28531 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28532
28533    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28534    named arg and all anonymous args onto the stack.
28535    XXX I know the prologue shouldn't be pushing registers, but it is faster
28536    that way.  */
28537
28538 static void
28539 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28540                             enum machine_mode mode,
28541                             tree type,
28542                             int *pretend_size,
28543                             int second_time ATTRIBUTE_UNUSED)
28544 {
28545   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28546   int nregs;
28547
28548   cfun->machine->uses_anonymous_args = 1;
28549   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28550     {
28551       nregs = pcum->aapcs_ncrn;
28552       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28553         nregs++;
28554     }
28555   else
28556     nregs = pcum->nregs;
28557
28558   if (nregs < NUM_ARG_REGS)
28559     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28560 }
28561
28562 /* We can't rely on the caller doing the proper promotion when
28563    using APCS or ATPCS.  */
28564
28565 static bool
28566 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28567 {
28568     return !TARGET_AAPCS_BASED;
28569 }
28570
28571 static enum machine_mode
28572 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28573                            enum machine_mode mode,
28574                            int *punsignedp ATTRIBUTE_UNUSED,
28575                            const_tree fntype ATTRIBUTE_UNUSED,
28576                            int for_return ATTRIBUTE_UNUSED)
28577 {
28578   if (GET_MODE_CLASS (mode) == MODE_INT
28579       && GET_MODE_SIZE (mode) < 4)
28580     return SImode;
28581
28582   return mode;
28583 }
28584
28585 /* AAPCS based ABIs use short enums by default.  */
28586
28587 static bool
28588 arm_default_short_enums (void)
28589 {
28590   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28591 }
28592
28593
28594 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28595
28596 static bool
28597 arm_align_anon_bitfield (void)
28598 {
28599   return TARGET_AAPCS_BASED;
28600 }
28601
28602
28603 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28604
28605 static tree
28606 arm_cxx_guard_type (void)
28607 {
28608   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28609 }
28610
28611
28612 /* The EABI says test the least significant bit of a guard variable.  */
28613
28614 static bool
28615 arm_cxx_guard_mask_bit (void)
28616 {
28617   return TARGET_AAPCS_BASED;
28618 }
28619
28620
28621 /* The EABI specifies that all array cookies are 8 bytes long.  */
28622
28623 static tree
28624 arm_get_cookie_size (tree type)
28625 {
28626   tree size;
28627
28628   if (!TARGET_AAPCS_BASED)
28629     return default_cxx_get_cookie_size (type);
28630
28631   size = build_int_cst (sizetype, 8);
28632   return size;
28633 }
28634
28635
28636 /* The EABI says that array cookies should also contain the element size.  */
28637
28638 static bool
28639 arm_cookie_has_size (void)
28640 {
28641   return TARGET_AAPCS_BASED;
28642 }
28643
28644
28645 /* The EABI says constructors and destructors should return a pointer to
28646    the object constructed/destroyed.  */
28647
28648 static bool
28649 arm_cxx_cdtor_returns_this (void)
28650 {
28651   return TARGET_AAPCS_BASED;
28652 }
28653
28654 /* The EABI says that an inline function may never be the key
28655    method.  */
28656
28657 static bool
28658 arm_cxx_key_method_may_be_inline (void)
28659 {
28660   return !TARGET_AAPCS_BASED;
28661 }
28662
28663 static void
28664 arm_cxx_determine_class_data_visibility (tree decl)
28665 {
28666   if (!TARGET_AAPCS_BASED
28667       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28668     return;
28669
28670   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28671      is exported.  However, on systems without dynamic vague linkage,
28672      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
28673   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28674     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28675   else
28676     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28677   DECL_VISIBILITY_SPECIFIED (decl) = 1;
28678 }
28679
28680 static bool
28681 arm_cxx_class_data_always_comdat (void)
28682 {
28683   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28684      vague linkage if the class has no key function.  */
28685   return !TARGET_AAPCS_BASED;
28686 }
28687
28688
28689 /* The EABI says __aeabi_atexit should be used to register static
28690    destructors.  */
28691
28692 static bool
28693 arm_cxx_use_aeabi_atexit (void)
28694 {
28695   return TARGET_AAPCS_BASED;
28696 }
28697
28698
28699 void
28700 arm_set_return_address (rtx source, rtx scratch)
28701 {
28702   arm_stack_offsets *offsets;
28703   HOST_WIDE_INT delta;
28704   rtx addr;
28705   unsigned long saved_regs;
28706
28707   offsets = arm_get_frame_offsets ();
28708   saved_regs = offsets->saved_regs_mask;
28709
28710   if ((saved_regs & (1 << LR_REGNUM)) == 0)
28711     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28712   else
28713     {
28714       if (frame_pointer_needed)
28715         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28716       else
28717         {
28718           /* LR will be the first saved register.  */
28719           delta = offsets->outgoing_args - (offsets->frame + 4);
28720
28721
28722           if (delta >= 4096)
28723             {
28724               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28725                                      GEN_INT (delta & ~4095)));
28726               addr = scratch;
28727               delta &= 4095;
28728             }
28729           else
28730             addr = stack_pointer_rtx;
28731
28732           addr = plus_constant (Pmode, addr, delta);
28733         }
28734       emit_move_insn (gen_frame_mem (Pmode, addr), source);
28735     }
28736 }
28737
28738
28739 void
28740 thumb_set_return_address (rtx source, rtx scratch)
28741 {
28742   arm_stack_offsets *offsets;
28743   HOST_WIDE_INT delta;
28744   HOST_WIDE_INT limit;
28745   int reg;
28746   rtx addr;
28747   unsigned long mask;
28748
28749   emit_use (source);
28750
28751   offsets = arm_get_frame_offsets ();
28752   mask = offsets->saved_regs_mask;
28753   if (mask & (1 << LR_REGNUM))
28754     {
28755       limit = 1024;
28756       /* Find the saved regs.  */
28757       if (frame_pointer_needed)
28758         {
28759           delta = offsets->soft_frame - offsets->saved_args;
28760           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28761           if (TARGET_THUMB1)
28762             limit = 128;
28763         }
28764       else
28765         {
28766           delta = offsets->outgoing_args - offsets->saved_args;
28767           reg = SP_REGNUM;
28768         }
28769       /* Allow for the stack frame.  */
28770       if (TARGET_THUMB1 && TARGET_BACKTRACE)
28771         delta -= 16;
28772       /* The link register is always the first saved register.  */
28773       delta -= 4;
28774
28775       /* Construct the address.  */
28776       addr = gen_rtx_REG (SImode, reg);
28777       if (delta > limit)
28778         {
28779           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28780           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28781           addr = scratch;
28782         }
28783       else
28784         addr = plus_constant (Pmode, addr, delta);
28785
28786       emit_move_insn (gen_frame_mem (Pmode, addr), source);
28787     }
28788   else
28789     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28790 }
28791
28792 /* Implements target hook vector_mode_supported_p.  */
28793 bool
28794 arm_vector_mode_supported_p (enum machine_mode mode)
28795 {
28796   /* Neon also supports V2SImode, etc. listed in the clause below.  */
28797   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28798       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28799     return true;
28800
28801   if ((TARGET_NEON || TARGET_IWMMXT)
28802       && ((mode == V2SImode)
28803           || (mode == V4HImode)
28804           || (mode == V8QImode)))
28805     return true;
28806
28807   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28808       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28809       || mode == V2HAmode))
28810     return true;
28811
28812   return false;
28813 }
28814
28815 /* Implements target hook array_mode_supported_p.  */
28816
28817 static bool
28818 arm_array_mode_supported_p (enum machine_mode mode,
28819                             unsigned HOST_WIDE_INT nelems)
28820 {
28821   if (TARGET_NEON
28822       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28823       && (nelems >= 2 && nelems <= 4))
28824     return true;
28825
28826   return false;
28827 }
28828
28829 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28830    registers when autovectorizing for Neon, at least until multiple vector
28831    widths are supported properly by the middle-end.  */
28832
28833 static enum machine_mode
28834 arm_preferred_simd_mode (enum machine_mode mode)
28835 {
28836   if (TARGET_NEON)
28837     switch (mode)
28838       {
28839       case SFmode:
28840         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28841       case SImode:
28842         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28843       case HImode:
28844         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28845       case QImode:
28846         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28847       case DImode:
28848         if (!TARGET_NEON_VECTORIZE_DOUBLE)
28849           return V2DImode;
28850         break;
28851
28852       default:;
28853       }
28854
28855   if (TARGET_REALLY_IWMMXT)
28856     switch (mode)
28857       {
28858       case SImode:
28859         return V2SImode;
28860       case HImode:
28861         return V4HImode;
28862       case QImode:
28863         return V8QImode;
28864
28865       default:;
28866       }
28867
28868   return word_mode;
28869 }
28870
28871 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28872
28873    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
28874    using r0-r4 for function arguments, r7 for the stack frame and don't have
28875    enough left over to do doubleword arithmetic.  For Thumb-2 all the
28876    potentially problematic instructions accept high registers so this is not
28877    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
28878    that require many low registers.  */
28879 static bool
28880 arm_class_likely_spilled_p (reg_class_t rclass)
28881 {
28882   if ((TARGET_THUMB1 && rclass == LO_REGS)
28883       || rclass  == CC_REG)
28884     return true;
28885
28886   return false;
28887 }
28888
28889 /* Implements target hook small_register_classes_for_mode_p.  */
28890 bool
28891 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28892 {
28893   return TARGET_THUMB1;
28894 }
28895
28896 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
28897    ARM insns and therefore guarantee that the shift count is modulo 256.
28898    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28899    guarantee no particular behavior for out-of-range counts.  */
28900
28901 static unsigned HOST_WIDE_INT
28902 arm_shift_truncation_mask (enum machine_mode mode)
28903 {
28904   return mode == SImode ? 255 : 0;
28905 }
28906
28907
28908 /* Map internal gcc register numbers to DWARF2 register numbers.  */
28909
28910 unsigned int
28911 arm_dbx_register_number (unsigned int regno)
28912 {
28913   if (regno < 16)
28914     return regno;
28915
28916   if (IS_VFP_REGNUM (regno))
28917     {
28918       /* See comment in arm_dwarf_register_span.  */
28919       if (VFP_REGNO_OK_FOR_SINGLE (regno))
28920         return 64 + regno - FIRST_VFP_REGNUM;
28921       else
28922         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28923     }
28924
28925   if (IS_IWMMXT_GR_REGNUM (regno))
28926     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28927
28928   if (IS_IWMMXT_REGNUM (regno))
28929     return 112 + regno - FIRST_IWMMXT_REGNUM;
28930
28931   gcc_unreachable ();
28932 }
28933
28934 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28935    GCC models tham as 64 32-bit registers, so we need to describe this to
28936    the DWARF generation code.  Other registers can use the default.  */
28937 static rtx
28938 arm_dwarf_register_span (rtx rtl)
28939 {
28940   enum machine_mode mode;
28941   unsigned regno;
28942   rtx parts[16];
28943   int nregs;
28944   int i;
28945
28946   regno = REGNO (rtl);
28947   if (!IS_VFP_REGNUM (regno))
28948     return NULL_RTX;
28949
28950   /* XXX FIXME: The EABI defines two VFP register ranges:
28951         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28952         256-287: D0-D31
28953      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28954      corresponding D register.  Until GDB supports this, we shall use the
28955      legacy encodings.  We also use these encodings for D0-D15 for
28956      compatibility with older debuggers.  */
28957   mode = GET_MODE (rtl);
28958   if (GET_MODE_SIZE (mode) < 8)
28959     return NULL_RTX;
28960
28961   if (VFP_REGNO_OK_FOR_SINGLE (regno))
28962     {
28963       nregs = GET_MODE_SIZE (mode) / 4;
28964       for (i = 0; i < nregs; i += 2)
28965         if (TARGET_BIG_END)
28966           {
28967             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28968             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28969           }
28970         else
28971           {
28972             parts[i] = gen_rtx_REG (SImode, regno + i);
28973             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28974           }
28975     }
28976   else
28977     {
28978       nregs = GET_MODE_SIZE (mode) / 8;
28979       for (i = 0; i < nregs; i++)
28980         parts[i] = gen_rtx_REG (DImode, regno + i);
28981     }
28982
28983   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28984 }
28985
28986 #if ARM_UNWIND_INFO
28987 /* Emit unwind directives for a store-multiple instruction or stack pointer
28988    push during alignment.
28989    These should only ever be generated by the function prologue code, so
28990    expect them to have a particular form.
28991    The store-multiple instruction sometimes pushes pc as the last register,
28992    although it should not be tracked into unwind information, or for -Os
28993    sometimes pushes some dummy registers before first register that needs
28994    to be tracked in unwind information; such dummy registers are there just
28995    to avoid separate stack adjustment, and will not be restored in the
28996    epilogue.  */
28997
28998 static void
28999 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29000 {
29001   int i;
29002   HOST_WIDE_INT offset;
29003   HOST_WIDE_INT nregs;
29004   int reg_size;
29005   unsigned reg;
29006   unsigned lastreg;
29007   unsigned padfirst = 0, padlast = 0;
29008   rtx e;
29009
29010   e = XVECEXP (p, 0, 0);
29011   gcc_assert (GET_CODE (e) == SET);
29012
29013   /* First insn will adjust the stack pointer.  */
29014   gcc_assert (GET_CODE (e) == SET
29015               && REG_P (SET_DEST (e))
29016               && REGNO (SET_DEST (e)) == SP_REGNUM
29017               && GET_CODE (SET_SRC (e)) == PLUS);
29018
29019   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29020   nregs = XVECLEN (p, 0) - 1;
29021   gcc_assert (nregs);
29022
29023   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29024   if (reg < 16)
29025     {
29026       /* For -Os dummy registers can be pushed at the beginning to
29027          avoid separate stack pointer adjustment.  */
29028       e = XVECEXP (p, 0, 1);
29029       e = XEXP (SET_DEST (e), 0);
29030       if (GET_CODE (e) == PLUS)
29031         padfirst = INTVAL (XEXP (e, 1));
29032       gcc_assert (padfirst == 0 || optimize_size);
29033       /* The function prologue may also push pc, but not annotate it as it is
29034          never restored.  We turn this into a stack pointer adjustment.  */
29035       e = XVECEXP (p, 0, nregs);
29036       e = XEXP (SET_DEST (e), 0);
29037       if (GET_CODE (e) == PLUS)
29038         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29039       else
29040         padlast = offset - 4;
29041       gcc_assert (padlast == 0 || padlast == 4);
29042       if (padlast == 4)
29043         fprintf (asm_out_file, "\t.pad #4\n");
29044       reg_size = 4;
29045       fprintf (asm_out_file, "\t.save {");
29046     }
29047   else if (IS_VFP_REGNUM (reg))
29048     {
29049       reg_size = 8;
29050       fprintf (asm_out_file, "\t.vsave {");
29051     }
29052   else
29053     /* Unknown register type.  */
29054     gcc_unreachable ();
29055
29056   /* If the stack increment doesn't match the size of the saved registers,
29057      something has gone horribly wrong.  */
29058   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29059
29060   offset = padfirst;
29061   lastreg = 0;
29062   /* The remaining insns will describe the stores.  */
29063   for (i = 1; i <= nregs; i++)
29064     {
29065       /* Expect (set (mem <addr>) (reg)).
29066          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29067       e = XVECEXP (p, 0, i);
29068       gcc_assert (GET_CODE (e) == SET
29069                   && MEM_P (SET_DEST (e))
29070                   && REG_P (SET_SRC (e)));
29071
29072       reg = REGNO (SET_SRC (e));
29073       gcc_assert (reg >= lastreg);
29074
29075       if (i != 1)
29076         fprintf (asm_out_file, ", ");
29077       /* We can't use %r for vfp because we need to use the
29078          double precision register names.  */
29079       if (IS_VFP_REGNUM (reg))
29080         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29081       else
29082         asm_fprintf (asm_out_file, "%r", reg);
29083
29084 #ifdef ENABLE_CHECKING
29085       /* Check that the addresses are consecutive.  */
29086       e = XEXP (SET_DEST (e), 0);
29087       if (GET_CODE (e) == PLUS)
29088         gcc_assert (REG_P (XEXP (e, 0))
29089                     && REGNO (XEXP (e, 0)) == SP_REGNUM
29090                     && CONST_INT_P (XEXP (e, 1))
29091                     && offset == INTVAL (XEXP (e, 1)));
29092       else
29093         gcc_assert (i == 1
29094                     && REG_P (e)
29095                     && REGNO (e) == SP_REGNUM);
29096       offset += reg_size;
29097 #endif
29098     }
29099   fprintf (asm_out_file, "}\n");
29100   if (padfirst)
29101     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29102 }
29103
29104 /*  Emit unwind directives for a SET.  */
29105
29106 static void
29107 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29108 {
29109   rtx e0;
29110   rtx e1;
29111   unsigned reg;
29112
29113   e0 = XEXP (p, 0);
29114   e1 = XEXP (p, 1);
29115   switch (GET_CODE (e0))
29116     {
29117     case MEM:
29118       /* Pushing a single register.  */
29119       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29120           || !REG_P (XEXP (XEXP (e0, 0), 0))
29121           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29122         abort ();
29123
29124       asm_fprintf (asm_out_file, "\t.save ");
29125       if (IS_VFP_REGNUM (REGNO (e1)))
29126         asm_fprintf(asm_out_file, "{d%d}\n",
29127                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29128       else
29129         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29130       break;
29131
29132     case REG:
29133       if (REGNO (e0) == SP_REGNUM)
29134         {
29135           /* A stack increment.  */
29136           if (GET_CODE (e1) != PLUS
29137               || !REG_P (XEXP (e1, 0))
29138               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29139               || !CONST_INT_P (XEXP (e1, 1)))
29140             abort ();
29141
29142           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29143                        -INTVAL (XEXP (e1, 1)));
29144         }
29145       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29146         {
29147           HOST_WIDE_INT offset;
29148
29149           if (GET_CODE (e1) == PLUS)
29150             {
29151               if (!REG_P (XEXP (e1, 0))
29152                   || !CONST_INT_P (XEXP (e1, 1)))
29153                 abort ();
29154               reg = REGNO (XEXP (e1, 0));
29155               offset = INTVAL (XEXP (e1, 1));
29156               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29157                            HARD_FRAME_POINTER_REGNUM, reg,
29158                            offset);
29159             }
29160           else if (REG_P (e1))
29161             {
29162               reg = REGNO (e1);
29163               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29164                            HARD_FRAME_POINTER_REGNUM, reg);
29165             }
29166           else
29167             abort ();
29168         }
29169       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29170         {
29171           /* Move from sp to reg.  */
29172           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29173         }
29174      else if (GET_CODE (e1) == PLUS
29175               && REG_P (XEXP (e1, 0))
29176               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29177               && CONST_INT_P (XEXP (e1, 1)))
29178         {
29179           /* Set reg to offset from sp.  */
29180           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29181                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29182         }
29183       else
29184         abort ();
29185       break;
29186
29187     default:
29188       abort ();
29189     }
29190 }
29191
29192
29193 /* Emit unwind directives for the given insn.  */
29194
29195 static void
29196 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29197 {
29198   rtx note, pat;
29199   bool handled_one = false;
29200
29201   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29202     return;
29203
29204   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29205       && (TREE_NOTHROW (current_function_decl)
29206           || crtl->all_throwers_are_sibcalls))
29207     return;
29208
29209   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29210     return;
29211
29212   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29213     {
29214       switch (REG_NOTE_KIND (note))
29215         {
29216         case REG_FRAME_RELATED_EXPR:
29217           pat = XEXP (note, 0);
29218           goto found;
29219
29220         case REG_CFA_REGISTER:
29221           pat = XEXP (note, 0);
29222           if (pat == NULL)
29223             {
29224               pat = PATTERN (insn);
29225               if (GET_CODE (pat) == PARALLEL)
29226                 pat = XVECEXP (pat, 0, 0);
29227             }
29228
29229           /* Only emitted for IS_STACKALIGN re-alignment.  */
29230           {
29231             rtx dest, src;
29232             unsigned reg;
29233
29234             src = SET_SRC (pat);
29235             dest = SET_DEST (pat);
29236
29237             gcc_assert (src == stack_pointer_rtx);
29238             reg = REGNO (dest);
29239             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29240                          reg + 0x90, reg);
29241           }
29242           handled_one = true;
29243           break;
29244
29245         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29246            to get correct dwarf information for shrink-wrap.  We should not
29247            emit unwind information for it because these are used either for
29248            pretend arguments or notes to adjust sp and restore registers from
29249            stack.  */
29250         case REG_CFA_DEF_CFA:
29251         case REG_CFA_ADJUST_CFA:
29252         case REG_CFA_RESTORE:
29253           return;
29254
29255         case REG_CFA_EXPRESSION:
29256         case REG_CFA_OFFSET:
29257           /* ??? Only handling here what we actually emit.  */
29258           gcc_unreachable ();
29259
29260         default:
29261           break;
29262         }
29263     }
29264   if (handled_one)
29265     return;
29266   pat = PATTERN (insn);
29267  found:
29268
29269   switch (GET_CODE (pat))
29270     {
29271     case SET:
29272       arm_unwind_emit_set (asm_out_file, pat);
29273       break;
29274
29275     case SEQUENCE:
29276       /* Store multiple.  */
29277       arm_unwind_emit_sequence (asm_out_file, pat);
29278       break;
29279
29280     default:
29281       abort();
29282     }
29283 }
29284
29285
29286 /* Output a reference from a function exception table to the type_info
29287    object X.  The EABI specifies that the symbol should be relocated by
29288    an R_ARM_TARGET2 relocation.  */
29289
29290 static bool
29291 arm_output_ttype (rtx x)
29292 {
29293   fputs ("\t.word\t", asm_out_file);
29294   output_addr_const (asm_out_file, x);
29295   /* Use special relocations for symbol references.  */
29296   if (!CONST_INT_P (x))
29297     fputs ("(TARGET2)", asm_out_file);
29298   fputc ('\n', asm_out_file);
29299
29300   return TRUE;
29301 }
29302
29303 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29304
29305 static void
29306 arm_asm_emit_except_personality (rtx personality)
29307 {
29308   fputs ("\t.personality\t", asm_out_file);
29309   output_addr_const (asm_out_file, personality);
29310   fputc ('\n', asm_out_file);
29311 }
29312
29313 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29314
29315 static void
29316 arm_asm_init_sections (void)
29317 {
29318   exception_section = get_unnamed_section (0, output_section_asm_op,
29319                                            "\t.handlerdata");
29320 }
29321 #endif /* ARM_UNWIND_INFO */
29322
29323 /* Output unwind directives for the start/end of a function.  */
29324
29325 void
29326 arm_output_fn_unwind (FILE * f, bool prologue)
29327 {
29328   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29329     return;
29330
29331   if (prologue)
29332     fputs ("\t.fnstart\n", f);
29333   else
29334     {
29335       /* If this function will never be unwound, then mark it as such.
29336          The came condition is used in arm_unwind_emit to suppress
29337          the frame annotations.  */
29338       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29339           && (TREE_NOTHROW (current_function_decl)
29340               || crtl->all_throwers_are_sibcalls))
29341         fputs("\t.cantunwind\n", f);
29342
29343       fputs ("\t.fnend\n", f);
29344     }
29345 }
29346
29347 static bool
29348 arm_emit_tls_decoration (FILE *fp, rtx x)
29349 {
29350   enum tls_reloc reloc;
29351   rtx val;
29352
29353   val = XVECEXP (x, 0, 0);
29354   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29355
29356   output_addr_const (fp, val);
29357
29358   switch (reloc)
29359     {
29360     case TLS_GD32:
29361       fputs ("(tlsgd)", fp);
29362       break;
29363     case TLS_LDM32:
29364       fputs ("(tlsldm)", fp);
29365       break;
29366     case TLS_LDO32:
29367       fputs ("(tlsldo)", fp);
29368       break;
29369     case TLS_IE32:
29370       fputs ("(gottpoff)", fp);
29371       break;
29372     case TLS_LE32:
29373       fputs ("(tpoff)", fp);
29374       break;
29375     case TLS_DESCSEQ:
29376       fputs ("(tlsdesc)", fp);
29377       break;
29378     default:
29379       gcc_unreachable ();
29380     }
29381
29382   switch (reloc)
29383     {
29384     case TLS_GD32:
29385     case TLS_LDM32:
29386     case TLS_IE32:
29387     case TLS_DESCSEQ:
29388       fputs (" + (. - ", fp);
29389       output_addr_const (fp, XVECEXP (x, 0, 2));
29390       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29391       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29392       output_addr_const (fp, XVECEXP (x, 0, 3));
29393       fputc (')', fp);
29394       break;
29395     default:
29396       break;
29397     }
29398
29399   return TRUE;
29400 }
29401
29402 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29403
29404 static void
29405 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29406 {
29407   gcc_assert (size == 4);
29408   fputs ("\t.word\t", file);
29409   output_addr_const (file, x);
29410   fputs ("(tlsldo)", file);
29411 }
29412
29413 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29414
29415 static bool
29416 arm_output_addr_const_extra (FILE *fp, rtx x)
29417 {
29418   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29419     return arm_emit_tls_decoration (fp, x);
29420   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29421     {
29422       char label[256];
29423       int labelno = INTVAL (XVECEXP (x, 0, 0));
29424
29425       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29426       assemble_name_raw (fp, label);
29427
29428       return TRUE;
29429     }
29430   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29431     {
29432       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29433       if (GOT_PCREL)
29434         fputs ("+.", fp);
29435       fputs ("-(", fp);
29436       output_addr_const (fp, XVECEXP (x, 0, 0));
29437       fputc (')', fp);
29438       return TRUE;
29439     }
29440   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29441     {
29442       output_addr_const (fp, XVECEXP (x, 0, 0));
29443       if (GOT_PCREL)
29444         fputs ("+.", fp);
29445       fputs ("-(", fp);
29446       output_addr_const (fp, XVECEXP (x, 0, 1));
29447       fputc (')', fp);
29448       return TRUE;
29449     }
29450   else if (GET_CODE (x) == CONST_VECTOR)
29451     return arm_emit_vector_const (fp, x);
29452
29453   return FALSE;
29454 }
29455
29456 /* Output assembly for a shift instruction.
29457    SET_FLAGS determines how the instruction modifies the condition codes.
29458    0 - Do not set condition codes.
29459    1 - Set condition codes.
29460    2 - Use smallest instruction.  */
29461 const char *
29462 arm_output_shift(rtx * operands, int set_flags)
29463 {
29464   char pattern[100];
29465   static const char flag_chars[3] = {'?', '.', '!'};
29466   const char *shift;
29467   HOST_WIDE_INT val;
29468   char c;
29469
29470   c = flag_chars[set_flags];
29471   if (TARGET_UNIFIED_ASM)
29472     {
29473       shift = shift_op(operands[3], &val);
29474       if (shift)
29475         {
29476           if (val != -1)
29477             operands[2] = GEN_INT(val);
29478           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29479         }
29480       else
29481         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29482     }
29483   else
29484     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29485   output_asm_insn (pattern, operands);
29486   return "";
29487 }
29488
29489 /* Output assembly for a WMMX immediate shift instruction.  */
29490 const char *
29491 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29492 {
29493   int shift = INTVAL (operands[2]);
29494   char templ[50];
29495   enum machine_mode opmode = GET_MODE (operands[0]);
29496
29497   gcc_assert (shift >= 0);
29498
29499   /* If the shift value in the register versions is > 63 (for D qualifier),
29500      31 (for W qualifier) or 15 (for H qualifier).  */
29501   if (((opmode == V4HImode) && (shift > 15))
29502         || ((opmode == V2SImode) && (shift > 31))
29503         || ((opmode == DImode) && (shift > 63)))
29504   {
29505     if (wror_or_wsra)
29506       {
29507         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29508         output_asm_insn (templ, operands);
29509         if (opmode == DImode)
29510           {
29511             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29512             output_asm_insn (templ, operands);
29513           }
29514       }
29515     else
29516       {
29517         /* The destination register will contain all zeros.  */
29518         sprintf (templ, "wzero\t%%0");
29519         output_asm_insn (templ, operands);
29520       }
29521     return "";
29522   }
29523
29524   if ((opmode == DImode) && (shift > 32))
29525     {
29526       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29527       output_asm_insn (templ, operands);
29528       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29529       output_asm_insn (templ, operands);
29530     }
29531   else
29532     {
29533       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29534       output_asm_insn (templ, operands);
29535     }
29536   return "";
29537 }
29538
29539 /* Output assembly for a WMMX tinsr instruction.  */
29540 const char *
29541 arm_output_iwmmxt_tinsr (rtx *operands)
29542 {
29543   int mask = INTVAL (operands[3]);
29544   int i;
29545   char templ[50];
29546   int units = mode_nunits[GET_MODE (operands[0])];
29547   gcc_assert ((mask & (mask - 1)) == 0);
29548   for (i = 0; i < units; ++i)
29549     {
29550       if ((mask & 0x01) == 1)
29551         {
29552           break;
29553         }
29554       mask >>= 1;
29555     }
29556   gcc_assert (i < units);
29557   {
29558     switch (GET_MODE (operands[0]))
29559       {
29560       case V8QImode:
29561         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29562         break;
29563       case V4HImode:
29564         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29565         break;
29566       case V2SImode:
29567         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29568         break;
29569       default:
29570         gcc_unreachable ();
29571         break;
29572       }
29573     output_asm_insn (templ, operands);
29574   }
29575   return "";
29576 }
29577
29578 /* Output a Thumb-1 casesi dispatch sequence.  */
29579 const char *
29580 thumb1_output_casesi (rtx *operands)
29581 {
29582   rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29583
29584   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29585
29586   switch (GET_MODE(diff_vec))
29587     {
29588     case QImode:
29589       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29590               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29591     case HImode:
29592       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29593               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29594     case SImode:
29595       return "bl\t%___gnu_thumb1_case_si";
29596     default:
29597       gcc_unreachable ();
29598     }
29599 }
29600
29601 /* Output a Thumb-2 casesi instruction.  */
29602 const char *
29603 thumb2_output_casesi (rtx *operands)
29604 {
29605   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29606
29607   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29608
29609   output_asm_insn ("cmp\t%0, %1", operands);
29610   output_asm_insn ("bhi\t%l3", operands);
29611   switch (GET_MODE(diff_vec))
29612     {
29613     case QImode:
29614       return "tbb\t[%|pc, %0]";
29615     case HImode:
29616       return "tbh\t[%|pc, %0, lsl #1]";
29617     case SImode:
29618       if (flag_pic)
29619         {
29620           output_asm_insn ("adr\t%4, %l2", operands);
29621           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29622           output_asm_insn ("add\t%4, %4, %5", operands);
29623           return "bx\t%4";
29624         }
29625       else
29626         {
29627           output_asm_insn ("adr\t%4, %l2", operands);
29628           return "ldr\t%|pc, [%4, %0, lsl #2]";
29629         }
29630     default:
29631       gcc_unreachable ();
29632     }
29633 }
29634
29635 /* Most ARM cores are single issue, but some newer ones can dual issue.
29636    The scheduler descriptions rely on this being correct.  */
29637 static int
29638 arm_issue_rate (void)
29639 {
29640   switch (arm_tune)
29641     {
29642     case cortexa15:
29643     case cortexa57:
29644       return 3;
29645
29646     case cortexr4:
29647     case cortexr4f:
29648     case cortexr5:
29649     case genericv7a:
29650     case cortexa5:
29651     case cortexa7:
29652     case cortexa8:
29653     case cortexa9:
29654     case cortexa12:
29655     case cortexa53:
29656     case fa726te:
29657     case marvell_pj4:
29658       return 2;
29659
29660     default:
29661       return 1;
29662     }
29663 }
29664
29665 /* A table and a function to perform ARM-specific name mangling for
29666    NEON vector types in order to conform to the AAPCS (see "Procedure
29667    Call Standard for the ARM Architecture", Appendix A).  To qualify
29668    for emission with the mangled names defined in that document, a
29669    vector type must not only be of the correct mode but also be
29670    composed of NEON vector element types (e.g. __builtin_neon_qi).  */
29671 typedef struct
29672 {
29673   enum machine_mode mode;
29674   const char *element_type_name;
29675   const char *aapcs_name;
29676 } arm_mangle_map_entry;
29677
29678 static arm_mangle_map_entry arm_mangle_map[] = {
29679   /* 64-bit containerized types.  */
29680   { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
29681   { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
29682   { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
29683   { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
29684   { V4HFmode,  "__builtin_neon_hf",     "18__simd64_float16_t" },
29685   { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
29686   { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
29687   { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
29688   { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
29689   { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
29690
29691   /* 128-bit containerized types.  */
29692   { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
29693   { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
29694   { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
29695   { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
29696   { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
29697   { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
29698   { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
29699   { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
29700   { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
29701   { VOIDmode, NULL, NULL }
29702 };
29703
29704 const char *
29705 arm_mangle_type (const_tree type)
29706 {
29707   arm_mangle_map_entry *pos = arm_mangle_map;
29708
29709   /* The ARM ABI documents (10th October 2008) say that "__va_list"
29710      has to be managled as if it is in the "std" namespace.  */
29711   if (TARGET_AAPCS_BASED
29712       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29713     return "St9__va_list";
29714
29715   /* Half-precision float.  */
29716   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29717     return "Dh";
29718
29719   if (TREE_CODE (type) != VECTOR_TYPE)
29720     return NULL;
29721
29722   /* Check the mode of the vector type, and the name of the vector
29723      element type, against the table.  */
29724   while (pos->mode != VOIDmode)
29725     {
29726       tree elt_type = TREE_TYPE (type);
29727
29728       if (pos->mode == TYPE_MODE (type)
29729           && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29730           && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29731                       pos->element_type_name))
29732         return pos->aapcs_name;
29733
29734       pos++;
29735     }
29736
29737   /* Use the default mangling for unrecognized (possibly user-defined)
29738      vector types.  */
29739   return NULL;
29740 }
29741
29742 /* Order of allocation of core registers for Thumb: this allocation is
29743    written over the corresponding initial entries of the array
29744    initialized with REG_ALLOC_ORDER.  We allocate all low registers
29745    first.  Saving and restoring a low register is usually cheaper than
29746    using a call-clobbered high register.  */
29747
29748 static const int thumb_core_reg_alloc_order[] =
29749 {
29750    3,  2,  1,  0,  4,  5,  6,  7,
29751   14, 12,  8,  9, 10, 11
29752 };
29753
29754 /* Adjust register allocation order when compiling for Thumb.  */
29755
29756 void
29757 arm_order_regs_for_local_alloc (void)
29758 {
29759   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29760   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29761   if (TARGET_THUMB)
29762     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29763             sizeof (thumb_core_reg_alloc_order));
29764 }
29765
29766 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
29767
29768 bool
29769 arm_frame_pointer_required (void)
29770 {
29771   return (cfun->has_nonlocal_label
29772           || SUBTARGET_FRAME_POINTER_REQUIRED
29773           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29774 }
29775
29776 /* Only thumb1 can't support conditional execution, so return true if
29777    the target is not thumb1.  */
29778 static bool
29779 arm_have_conditional_execution (void)
29780 {
29781   return !TARGET_THUMB1;
29782 }
29783
29784 tree
29785 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29786 {
29787   enum machine_mode in_mode, out_mode;
29788   int in_n, out_n;
29789
29790   if (TREE_CODE (type_out) != VECTOR_TYPE
29791       || TREE_CODE (type_in) != VECTOR_TYPE
29792       || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29793     return NULL_TREE;
29794
29795   out_mode = TYPE_MODE (TREE_TYPE (type_out));
29796   out_n = TYPE_VECTOR_SUBPARTS (type_out);
29797   in_mode = TYPE_MODE (TREE_TYPE (type_in));
29798   in_n = TYPE_VECTOR_SUBPARTS (type_in);
29799
29800 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29801    decl of the vectorized builtin for the appropriate vector mode.
29802    NULL_TREE is returned if no such builtin is available.  */
29803 #undef ARM_CHECK_BUILTIN_MODE
29804 #define ARM_CHECK_BUILTIN_MODE(C) \
29805   (out_mode == SFmode && out_n == C \
29806    && in_mode == SFmode && in_n == C)
29807
29808 #undef ARM_FIND_VRINT_VARIANT
29809 #define ARM_FIND_VRINT_VARIANT(N) \
29810   (ARM_CHECK_BUILTIN_MODE (2) \
29811     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29812     : (ARM_CHECK_BUILTIN_MODE (4) \
29813       ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29814       : NULL_TREE))
29815
29816   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29817     {
29818       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29819       switch (fn)
29820         {
29821           case BUILT_IN_FLOORF:
29822             return ARM_FIND_VRINT_VARIANT (vrintm);
29823           case BUILT_IN_CEILF:
29824             return ARM_FIND_VRINT_VARIANT (vrintp);
29825           case BUILT_IN_TRUNCF:
29826             return ARM_FIND_VRINT_VARIANT (vrintz);
29827           case BUILT_IN_ROUNDF:
29828             return ARM_FIND_VRINT_VARIANT (vrinta);
29829           default:
29830             return NULL_TREE;
29831         }
29832     }
29833   return NULL_TREE;
29834 }
29835 #undef ARM_CHECK_BUILTIN_MODE
29836 #undef ARM_FIND_VRINT_VARIANT
29837
29838 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
29839 static HOST_WIDE_INT
29840 arm_vector_alignment (const_tree type)
29841 {
29842   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29843
29844   if (TARGET_AAPCS_BASED)
29845     align = MIN (align, 64);
29846
29847   return align;
29848 }
29849
29850 static unsigned int
29851 arm_autovectorize_vector_sizes (void)
29852 {
29853   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29854 }
29855
29856 static bool
29857 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29858 {
29859   /* Vectors which aren't in packed structures will not be less aligned than
29860      the natural alignment of their element type, so this is safe.  */
29861   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29862     return !is_packed;
29863
29864   return default_builtin_vector_alignment_reachable (type, is_packed);
29865 }
29866
29867 static bool
29868 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29869                                          const_tree type, int misalignment,
29870                                          bool is_packed)
29871 {
29872   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29873     {
29874       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29875
29876       if (is_packed)
29877         return align == 1;
29878
29879       /* If the misalignment is unknown, we should be able to handle the access
29880          so long as it is not to a member of a packed data structure.  */
29881       if (misalignment == -1)
29882         return true;
29883
29884       /* Return true if the misalignment is a multiple of the natural alignment
29885          of the vector's element type.  This is probably always going to be
29886          true in practice, since we've already established that this isn't a
29887          packed access.  */
29888       return ((misalignment % align) == 0);
29889     }
29890
29891   return default_builtin_support_vector_misalignment (mode, type, misalignment,
29892                                                       is_packed);
29893 }
29894
29895 static void
29896 arm_conditional_register_usage (void)
29897 {
29898   int regno;
29899
29900   if (TARGET_THUMB1 && optimize_size)
29901     {
29902       /* When optimizing for size on Thumb-1, it's better not
29903         to use the HI regs, because of the overhead of
29904         stacking them.  */
29905       for (regno = FIRST_HI_REGNUM;
29906            regno <= LAST_HI_REGNUM; ++regno)
29907         fixed_regs[regno] = call_used_regs[regno] = 1;
29908     }
29909
29910   /* The link register can be clobbered by any branch insn,
29911      but we have no way to track that at present, so mark
29912      it as unavailable.  */
29913   if (TARGET_THUMB1)
29914     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29915
29916   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29917     {
29918       /* VFPv3 registers are disabled when earlier VFP
29919          versions are selected due to the definition of
29920          LAST_VFP_REGNUM.  */
29921       for (regno = FIRST_VFP_REGNUM;
29922            regno <= LAST_VFP_REGNUM; ++ regno)
29923         {
29924           fixed_regs[regno] = 0;
29925           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29926             || regno >= FIRST_VFP_REGNUM + 32;
29927         }
29928     }
29929
29930   if (TARGET_REALLY_IWMMXT)
29931     {
29932       regno = FIRST_IWMMXT_GR_REGNUM;
29933       /* The 2002/10/09 revision of the XScale ABI has wCG0
29934          and wCG1 as call-preserved registers.  The 2002/11/21
29935          revision changed this so that all wCG registers are
29936          scratch registers.  */
29937       for (regno = FIRST_IWMMXT_GR_REGNUM;
29938            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29939         fixed_regs[regno] = 0;
29940       /* The XScale ABI has wR0 - wR9 as scratch registers,
29941          the rest as call-preserved registers.  */
29942       for (regno = FIRST_IWMMXT_REGNUM;
29943            regno <= LAST_IWMMXT_REGNUM; ++ regno)
29944         {
29945           fixed_regs[regno] = 0;
29946           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29947         }
29948     }
29949
29950   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29951     {
29952       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29953       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29954     }
29955   else if (TARGET_APCS_STACK)
29956     {
29957       fixed_regs[10]     = 1;
29958       call_used_regs[10] = 1;
29959     }
29960   /* -mcaller-super-interworking reserves r11 for calls to
29961      _interwork_r11_call_via_rN().  Making the register global
29962      is an easy way of ensuring that it remains valid for all
29963      calls.  */
29964   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29965       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29966     {
29967       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29968       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29969       if (TARGET_CALLER_INTERWORKING)
29970         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29971     }
29972   SUBTARGET_CONDITIONAL_REGISTER_USAGE
29973 }
29974
29975 static reg_class_t
29976 arm_preferred_rename_class (reg_class_t rclass)
29977 {
29978   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29979      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
29980      and code size can be reduced.  */
29981   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29982     return LO_REGS;
29983   else
29984     return NO_REGS;
29985 }
29986
29987 /* Compute the atrribute "length" of insn "*push_multi".
29988    So this function MUST be kept in sync with that insn pattern.  */
29989 int
29990 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29991 {
29992   int i, regno, hi_reg;
29993   int num_saves = XVECLEN (parallel_op, 0);
29994
29995   /* ARM mode.  */
29996   if (TARGET_ARM)
29997     return 4;
29998   /* Thumb1 mode.  */
29999   if (TARGET_THUMB1)
30000     return 2;
30001
30002   /* Thumb2 mode.  */
30003   regno = REGNO (first_op);
30004   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30005   for (i = 1; i < num_saves && !hi_reg; i++)
30006     {
30007       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30008       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30009     }
30010
30011   if (!hi_reg)
30012     return 2;
30013   return 4;
30014 }
30015
30016 /* Compute the number of instructions emitted by output_move_double.  */
30017 int
30018 arm_count_output_move_double_insns (rtx *operands)
30019 {
30020   int count;
30021   rtx ops[2];
30022   /* output_move_double may modify the operands array, so call it
30023      here on a copy of the array.  */
30024   ops[0] = operands[0];
30025   ops[1] = operands[1];
30026   output_move_double (ops, false, &count);
30027   return count;
30028 }
30029
30030 int
30031 vfp3_const_double_for_fract_bits (rtx operand)
30032 {
30033   REAL_VALUE_TYPE r0;
30034
30035   if (!CONST_DOUBLE_P (operand))
30036     return 0;
30037
30038   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30039   if (exact_real_inverse (DFmode, &r0))
30040     {
30041       if (exact_real_truncate (DFmode, &r0))
30042         {
30043           HOST_WIDE_INT value = real_to_integer (&r0);
30044           value = value & 0xffffffff;
30045           if ((value != 0) && ( (value & (value - 1)) == 0))
30046             return int_log2 (value);
30047         }
30048     }
30049   return 0;
30050 }
30051
30052 int
30053 vfp3_const_double_for_bits (rtx operand)
30054 {
30055   REAL_VALUE_TYPE r0;
30056
30057   if (!CONST_DOUBLE_P (operand))
30058     return 0;
30059
30060   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30061   if (exact_real_truncate (DFmode, &r0))
30062     {
30063       HOST_WIDE_INT value = real_to_integer (&r0);
30064       value = value & 0xffffffff;
30065       if ((value != 0) && ( (value & (value - 1)) == 0))
30066         return int_log2 (value);
30067     }
30068
30069   return 0;
30070 }
30071 \f
30072 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30073
30074 static void
30075 arm_pre_atomic_barrier (enum memmodel model)
30076 {
30077   if (need_atomic_barrier_p (model, true))
30078     emit_insn (gen_memory_barrier ());
30079 }
30080
30081 static void
30082 arm_post_atomic_barrier (enum memmodel model)
30083 {
30084   if (need_atomic_barrier_p (model, false))
30085     emit_insn (gen_memory_barrier ());
30086 }
30087
30088 /* Emit the load-exclusive and store-exclusive instructions.
30089    Use acquire and release versions if necessary.  */
30090
30091 static void
30092 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30093 {
30094   rtx (*gen) (rtx, rtx);
30095
30096   if (acq)
30097     {
30098       switch (mode)
30099         {
30100         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30101         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30102         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30103         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30104         default:
30105           gcc_unreachable ();
30106         }
30107     }
30108   else
30109     {
30110       switch (mode)
30111         {
30112         case QImode: gen = gen_arm_load_exclusiveqi; break;
30113         case HImode: gen = gen_arm_load_exclusivehi; break;
30114         case SImode: gen = gen_arm_load_exclusivesi; break;
30115         case DImode: gen = gen_arm_load_exclusivedi; break;
30116         default:
30117           gcc_unreachable ();
30118         }
30119     }
30120
30121   emit_insn (gen (rval, mem));
30122 }
30123
30124 static void
30125 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30126                           rtx mem, bool rel)
30127 {
30128   rtx (*gen) (rtx, rtx, rtx);
30129
30130   if (rel)
30131     {
30132       switch (mode)
30133         {
30134         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30135         case HImode: gen = gen_arm_store_release_exclusivehi; break;
30136         case SImode: gen = gen_arm_store_release_exclusivesi; break;
30137         case DImode: gen = gen_arm_store_release_exclusivedi; break;
30138         default:
30139           gcc_unreachable ();
30140         }
30141     }
30142   else
30143     {
30144       switch (mode)
30145         {
30146         case QImode: gen = gen_arm_store_exclusiveqi; break;
30147         case HImode: gen = gen_arm_store_exclusivehi; break;
30148         case SImode: gen = gen_arm_store_exclusivesi; break;
30149         case DImode: gen = gen_arm_store_exclusivedi; break;
30150         default:
30151           gcc_unreachable ();
30152         }
30153     }
30154
30155   emit_insn (gen (bval, rval, mem));
30156 }
30157
30158 /* Mark the previous jump instruction as unlikely.  */
30159
30160 static void
30161 emit_unlikely_jump (rtx insn)
30162 {
30163   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30164
30165   insn = emit_jump_insn (insn);
30166   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30167 }
30168
30169 /* Expand a compare and swap pattern.  */
30170
30171 void
30172 arm_expand_compare_and_swap (rtx operands[])
30173 {
30174   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30175   enum machine_mode mode;
30176   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30177
30178   bval = operands[0];
30179   rval = operands[1];
30180   mem = operands[2];
30181   oldval = operands[3];
30182   newval = operands[4];
30183   is_weak = operands[5];
30184   mod_s = operands[6];
30185   mod_f = operands[7];
30186   mode = GET_MODE (mem);
30187
30188   /* Normally the succ memory model must be stronger than fail, but in the
30189      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30190      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30191
30192   if (TARGET_HAVE_LDACQ
30193       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30194       && INTVAL (mod_s) == MEMMODEL_RELEASE)
30195     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30196
30197   switch (mode)
30198     {
30199     case QImode:
30200     case HImode:
30201       /* For narrow modes, we're going to perform the comparison in SImode,
30202          so do the zero-extension now.  */
30203       rval = gen_reg_rtx (SImode);
30204       oldval = convert_modes (SImode, mode, oldval, true);
30205       /* FALLTHRU */
30206
30207     case SImode:
30208       /* Force the value into a register if needed.  We waited until after
30209          the zero-extension above to do this properly.  */
30210       if (!arm_add_operand (oldval, SImode))
30211         oldval = force_reg (SImode, oldval);
30212       break;
30213
30214     case DImode:
30215       if (!cmpdi_operand (oldval, mode))
30216         oldval = force_reg (mode, oldval);
30217       break;
30218
30219     default:
30220       gcc_unreachable ();
30221     }
30222
30223   switch (mode)
30224     {
30225     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30226     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30227     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30228     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30229     default:
30230       gcc_unreachable ();
30231     }
30232
30233   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30234
30235   if (mode == QImode || mode == HImode)
30236     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30237
30238   /* In all cases, we arrange for success to be signaled by Z set.
30239      This arrangement allows for the boolean result to be used directly
30240      in a subsequent branch, post optimization.  */
30241   x = gen_rtx_REG (CCmode, CC_REGNUM);
30242   x = gen_rtx_EQ (SImode, x, const0_rtx);
30243   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30244 }
30245
30246 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30247    another memory store between the load-exclusive and store-exclusive can
30248    reset the monitor from Exclusive to Open state.  This means we must wait
30249    until after reload to split the pattern, lest we get a register spill in
30250    the middle of the atomic sequence.  */
30251
30252 void
30253 arm_split_compare_and_swap (rtx operands[])
30254 {
30255   rtx rval, mem, oldval, newval, scratch;
30256   enum machine_mode mode;
30257   enum memmodel mod_s, mod_f;
30258   bool is_weak;
30259   rtx label1, label2, x, cond;
30260
30261   rval = operands[0];
30262   mem = operands[1];
30263   oldval = operands[2];
30264   newval = operands[3];
30265   is_weak = (operands[4] != const0_rtx);
30266   mod_s = (enum memmodel) INTVAL (operands[5]);
30267   mod_f = (enum memmodel) INTVAL (operands[6]);
30268   scratch = operands[7];
30269   mode = GET_MODE (mem);
30270
30271   bool use_acquire = TARGET_HAVE_LDACQ
30272                      && !(mod_s == MEMMODEL_RELAXED
30273                           || mod_s == MEMMODEL_CONSUME
30274                           || mod_s == MEMMODEL_RELEASE);
30275
30276   bool use_release = TARGET_HAVE_LDACQ
30277                      && !(mod_s == MEMMODEL_RELAXED
30278                           || mod_s == MEMMODEL_CONSUME
30279                           || mod_s == MEMMODEL_ACQUIRE);
30280
30281   /* Checks whether a barrier is needed and emits one accordingly.  */
30282   if (!(use_acquire || use_release))
30283     arm_pre_atomic_barrier (mod_s);
30284
30285   label1 = NULL_RTX;
30286   if (!is_weak)
30287     {
30288       label1 = gen_label_rtx ();
30289       emit_label (label1);
30290     }
30291   label2 = gen_label_rtx ();
30292
30293   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30294
30295   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30296   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30297   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30298                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30299   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30300
30301   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30302
30303   /* Weak or strong, we want EQ to be true for success, so that we
30304      match the flags that we got from the compare above.  */
30305   cond = gen_rtx_REG (CCmode, CC_REGNUM);
30306   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30307   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30308
30309   if (!is_weak)
30310     {
30311       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30312       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30313                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30314       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30315     }
30316
30317   if (mod_f != MEMMODEL_RELAXED)
30318     emit_label (label2);
30319
30320   /* Checks whether a barrier is needed and emits one accordingly.  */
30321   if (!(use_acquire || use_release))
30322     arm_post_atomic_barrier (mod_s);
30323
30324   if (mod_f == MEMMODEL_RELAXED)
30325     emit_label (label2);
30326 }
30327
30328 void
30329 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30330                      rtx value, rtx model_rtx, rtx cond)
30331 {
30332   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30333   enum machine_mode mode = GET_MODE (mem);
30334   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30335   rtx label, x;
30336
30337   bool use_acquire = TARGET_HAVE_LDACQ
30338                      && !(model == MEMMODEL_RELAXED
30339                           || model == MEMMODEL_CONSUME
30340                           || model == MEMMODEL_RELEASE);
30341
30342   bool use_release = TARGET_HAVE_LDACQ
30343                      && !(model == MEMMODEL_RELAXED
30344                           || model == MEMMODEL_CONSUME
30345                           || model == MEMMODEL_ACQUIRE);
30346
30347   /* Checks whether a barrier is needed and emits one accordingly.  */
30348   if (!(use_acquire || use_release))
30349     arm_pre_atomic_barrier (model);
30350
30351   label = gen_label_rtx ();
30352   emit_label (label);
30353
30354   if (new_out)
30355     new_out = gen_lowpart (wmode, new_out);
30356   if (old_out)
30357     old_out = gen_lowpart (wmode, old_out);
30358   else
30359     old_out = new_out;
30360   value = simplify_gen_subreg (wmode, value, mode, 0);
30361
30362   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30363
30364   switch (code)
30365     {
30366     case SET:
30367       new_out = value;
30368       break;
30369
30370     case NOT:
30371       x = gen_rtx_AND (wmode, old_out, value);
30372       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30373       x = gen_rtx_NOT (wmode, new_out);
30374       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30375       break;
30376
30377     case MINUS:
30378       if (CONST_INT_P (value))
30379         {
30380           value = GEN_INT (-INTVAL (value));
30381           code = PLUS;
30382         }
30383       /* FALLTHRU */
30384
30385     case PLUS:
30386       if (mode == DImode)
30387         {
30388           /* DImode plus/minus need to clobber flags.  */
30389           /* The adddi3 and subdi3 patterns are incorrectly written so that
30390              they require matching operands, even when we could easily support
30391              three operands.  Thankfully, this can be fixed up post-splitting,
30392              as the individual add+adc patterns do accept three operands and
30393              post-reload cprop can make these moves go away.  */
30394           emit_move_insn (new_out, old_out);
30395           if (code == PLUS)
30396             x = gen_adddi3 (new_out, new_out, value);
30397           else
30398             x = gen_subdi3 (new_out, new_out, value);
30399           emit_insn (x);
30400           break;
30401         }
30402       /* FALLTHRU */
30403
30404     default:
30405       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30406       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30407       break;
30408     }
30409
30410   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30411                             use_release);
30412
30413   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30414   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30415
30416   /* Checks whether a barrier is needed and emits one accordingly.  */
30417   if (!(use_acquire || use_release))
30418     arm_post_atomic_barrier (model);
30419 }
30420 \f
30421 #define MAX_VECT_LEN 16
30422
30423 struct expand_vec_perm_d
30424 {
30425   rtx target, op0, op1;
30426   unsigned char perm[MAX_VECT_LEN];
30427   enum machine_mode vmode;
30428   unsigned char nelt;
30429   bool one_vector_p;
30430   bool testing_p;
30431 };
30432
30433 /* Generate a variable permutation.  */
30434
30435 static void
30436 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30437 {
30438   enum machine_mode vmode = GET_MODE (target);
30439   bool one_vector_p = rtx_equal_p (op0, op1);
30440
30441   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30442   gcc_checking_assert (GET_MODE (op0) == vmode);
30443   gcc_checking_assert (GET_MODE (op1) == vmode);
30444   gcc_checking_assert (GET_MODE (sel) == vmode);
30445   gcc_checking_assert (TARGET_NEON);
30446
30447   if (one_vector_p)
30448     {
30449       if (vmode == V8QImode)
30450         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30451       else
30452         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30453     }
30454   else
30455     {
30456       rtx pair;
30457
30458       if (vmode == V8QImode)
30459         {
30460           pair = gen_reg_rtx (V16QImode);
30461           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30462           pair = gen_lowpart (TImode, pair);
30463           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30464         }
30465       else
30466         {
30467           pair = gen_reg_rtx (OImode);
30468           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30469           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30470         }
30471     }
30472 }
30473
30474 void
30475 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30476 {
30477   enum machine_mode vmode = GET_MODE (target);
30478   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30479   bool one_vector_p = rtx_equal_p (op0, op1);
30480   rtx rmask[MAX_VECT_LEN], mask;
30481
30482   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30483      numbering of elements for big-endian, we must reverse the order.  */
30484   gcc_checking_assert (!BYTES_BIG_ENDIAN);
30485
30486   /* The VTBL instruction does not use a modulo index, so we must take care
30487      of that ourselves.  */
30488   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30489   for (i = 0; i < nelt; ++i)
30490     rmask[i] = mask;
30491   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30492   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30493
30494   arm_expand_vec_perm_1 (target, op0, op1, sel);
30495 }
30496
30497 /* Generate or test for an insn that supports a constant permutation.  */
30498
30499 /* Recognize patterns for the VUZP insns.  */
30500
30501 static bool
30502 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30503 {
30504   unsigned int i, odd, mask, nelt = d->nelt;
30505   rtx out0, out1, in0, in1, x;
30506   rtx (*gen)(rtx, rtx, rtx, rtx);
30507
30508   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30509     return false;
30510
30511   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30512   if (d->perm[0] == 0)
30513     odd = 0;
30514   else if (d->perm[0] == 1)
30515     odd = 1;
30516   else
30517     return false;
30518   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30519
30520   for (i = 0; i < nelt; i++)
30521     {
30522       unsigned elt = (i * 2 + odd) & mask;
30523       if (d->perm[i] != elt)
30524         return false;
30525     }
30526
30527   /* Success!  */
30528   if (d->testing_p)
30529     return true;
30530
30531   switch (d->vmode)
30532     {
30533     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30534     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
30535     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
30536     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
30537     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
30538     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
30539     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
30540     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
30541     default:
30542       gcc_unreachable ();
30543     }
30544
30545   in0 = d->op0;
30546   in1 = d->op1;
30547   if (BYTES_BIG_ENDIAN)
30548     {
30549       x = in0, in0 = in1, in1 = x;
30550       odd = !odd;
30551     }
30552
30553   out0 = d->target;
30554   out1 = gen_reg_rtx (d->vmode);
30555   if (odd)
30556     x = out0, out0 = out1, out1 = x;
30557
30558   emit_insn (gen (out0, in0, in1, out1));
30559   return true;
30560 }
30561
30562 /* Recognize patterns for the VZIP insns.  */
30563
30564 static bool
30565 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30566 {
30567   unsigned int i, high, mask, nelt = d->nelt;
30568   rtx out0, out1, in0, in1, x;
30569   rtx (*gen)(rtx, rtx, rtx, rtx);
30570
30571   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30572     return false;
30573
30574   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30575   high = nelt / 2;
30576   if (d->perm[0] == high)
30577     ;
30578   else if (d->perm[0] == 0)
30579     high = 0;
30580   else
30581     return false;
30582   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30583
30584   for (i = 0; i < nelt / 2; i++)
30585     {
30586       unsigned elt = (i + high) & mask;
30587       if (d->perm[i * 2] != elt)
30588         return false;
30589       elt = (elt + nelt) & mask;
30590       if (d->perm[i * 2 + 1] != elt)
30591         return false;
30592     }
30593
30594   /* Success!  */
30595   if (d->testing_p)
30596     return true;
30597
30598   switch (d->vmode)
30599     {
30600     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30601     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
30602     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
30603     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
30604     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
30605     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
30606     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
30607     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
30608     default:
30609       gcc_unreachable ();
30610     }
30611
30612   in0 = d->op0;
30613   in1 = d->op1;
30614   if (BYTES_BIG_ENDIAN)
30615     {
30616       x = in0, in0 = in1, in1 = x;
30617       high = !high;
30618     }
30619
30620   out0 = d->target;
30621   out1 = gen_reg_rtx (d->vmode);
30622   if (high)
30623     x = out0, out0 = out1, out1 = x;
30624
30625   emit_insn (gen (out0, in0, in1, out1));
30626   return true;
30627 }
30628
30629 /* Recognize patterns for the VREV insns.  */
30630
30631 static bool
30632 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30633 {
30634   unsigned int i, j, diff, nelt = d->nelt;
30635   rtx (*gen)(rtx, rtx, rtx);
30636
30637   if (!d->one_vector_p)
30638     return false;
30639
30640   diff = d->perm[0];
30641   switch (diff)
30642     {
30643     case 7:
30644       switch (d->vmode)
30645         {
30646         case V16QImode: gen = gen_neon_vrev64v16qi; break;
30647         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
30648         default:
30649           return false;
30650         }
30651       break;
30652     case 3:
30653       switch (d->vmode)
30654         {
30655         case V16QImode: gen = gen_neon_vrev32v16qi; break;
30656         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
30657         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
30658         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
30659         default:
30660           return false;
30661         }
30662       break;
30663     case 1:
30664       switch (d->vmode)
30665         {
30666         case V16QImode: gen = gen_neon_vrev16v16qi; break;
30667         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
30668         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
30669         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
30670         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
30671         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
30672         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
30673         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
30674         default:
30675           return false;
30676         }
30677       break;
30678     default:
30679       return false;
30680     }
30681
30682   for (i = 0; i < nelt ; i += diff + 1)
30683     for (j = 0; j <= diff; j += 1)
30684       {
30685         /* This is guaranteed to be true as the value of diff
30686            is 7, 3, 1 and we should have enough elements in the
30687            queue to generate this. Getting a vector mask with a
30688            value of diff other than these values implies that
30689            something is wrong by the time we get here.  */
30690         gcc_assert (i + j < nelt);
30691         if (d->perm[i + j] != i + diff - j)
30692           return false;
30693       }
30694
30695   /* Success! */
30696   if (d->testing_p)
30697     return true;
30698
30699   /* ??? The third operand is an artifact of the builtin infrastructure
30700      and is ignored by the actual instruction.  */
30701   emit_insn (gen (d->target, d->op0, const0_rtx));
30702   return true;
30703 }
30704
30705 /* Recognize patterns for the VTRN insns.  */
30706
30707 static bool
30708 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30709 {
30710   unsigned int i, odd, mask, nelt = d->nelt;
30711   rtx out0, out1, in0, in1, x;
30712   rtx (*gen)(rtx, rtx, rtx, rtx);
30713
30714   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30715     return false;
30716
30717   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30718   if (d->perm[0] == 0)
30719     odd = 0;
30720   else if (d->perm[0] == 1)
30721     odd = 1;
30722   else
30723     return false;
30724   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30725
30726   for (i = 0; i < nelt; i += 2)
30727     {
30728       if (d->perm[i] != i + odd)
30729         return false;
30730       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30731         return false;
30732     }
30733
30734   /* Success!  */
30735   if (d->testing_p)
30736     return true;
30737
30738   switch (d->vmode)
30739     {
30740     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30741     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
30742     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
30743     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
30744     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
30745     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
30746     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
30747     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
30748     default:
30749       gcc_unreachable ();
30750     }
30751
30752   in0 = d->op0;
30753   in1 = d->op1;
30754   if (BYTES_BIG_ENDIAN)
30755     {
30756       x = in0, in0 = in1, in1 = x;
30757       odd = !odd;
30758     }
30759
30760   out0 = d->target;
30761   out1 = gen_reg_rtx (d->vmode);
30762   if (odd)
30763     x = out0, out0 = out1, out1 = x;
30764
30765   emit_insn (gen (out0, in0, in1, out1));
30766   return true;
30767 }
30768
30769 /* Recognize patterns for the VEXT insns.  */
30770
30771 static bool
30772 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30773 {
30774   unsigned int i, nelt = d->nelt;
30775   rtx (*gen) (rtx, rtx, rtx, rtx);
30776   rtx offset;
30777
30778   unsigned int location;
30779
30780   unsigned int next  = d->perm[0] + 1;
30781
30782   /* TODO: Handle GCC's numbering of elements for big-endian.  */
30783   if (BYTES_BIG_ENDIAN)
30784     return false;
30785
30786   /* Check if the extracted indexes are increasing by one.  */
30787   for (i = 1; i < nelt; next++, i++)
30788     {
30789       /* If we hit the most significant element of the 2nd vector in
30790          the previous iteration, no need to test further.  */
30791       if (next == 2 * nelt)
30792         return false;
30793
30794       /* If we are operating on only one vector: it could be a
30795          rotation.  If there are only two elements of size < 64, let
30796          arm_evpc_neon_vrev catch it.  */
30797       if (d->one_vector_p && (next == nelt))
30798         {
30799           if ((nelt == 2) && (d->vmode != V2DImode))
30800             return false;
30801           else
30802             next = 0;
30803         }
30804
30805       if (d->perm[i] != next)
30806         return false;
30807     }
30808
30809   location = d->perm[0];
30810
30811   switch (d->vmode)
30812     {
30813     case V16QImode: gen = gen_neon_vextv16qi; break;
30814     case V8QImode: gen = gen_neon_vextv8qi; break;
30815     case V4HImode: gen = gen_neon_vextv4hi; break;
30816     case V8HImode: gen = gen_neon_vextv8hi; break;
30817     case V2SImode: gen = gen_neon_vextv2si; break;
30818     case V4SImode: gen = gen_neon_vextv4si; break;
30819     case V2SFmode: gen = gen_neon_vextv2sf; break;
30820     case V4SFmode: gen = gen_neon_vextv4sf; break;
30821     case V2DImode: gen = gen_neon_vextv2di; break;
30822     default:
30823       return false;
30824     }
30825
30826   /* Success! */
30827   if (d->testing_p)
30828     return true;
30829
30830   offset = GEN_INT (location);
30831   emit_insn (gen (d->target, d->op0, d->op1, offset));
30832   return true;
30833 }
30834
30835 /* The NEON VTBL instruction is a fully variable permuation that's even
30836    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
30837    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
30838    can do slightly better by expanding this as a constant where we don't
30839    have to apply a mask.  */
30840
30841 static bool
30842 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30843 {
30844   rtx rperm[MAX_VECT_LEN], sel;
30845   enum machine_mode vmode = d->vmode;
30846   unsigned int i, nelt = d->nelt;
30847
30848   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30849      numbering of elements for big-endian, we must reverse the order.  */
30850   if (BYTES_BIG_ENDIAN)
30851     return false;
30852
30853   if (d->testing_p)
30854     return true;
30855
30856   /* Generic code will try constant permutation twice.  Once with the
30857      original mode and again with the elements lowered to QImode.
30858      So wait and don't do the selector expansion ourselves.  */
30859   if (vmode != V8QImode && vmode != V16QImode)
30860     return false;
30861
30862   for (i = 0; i < nelt; ++i)
30863     rperm[i] = GEN_INT (d->perm[i]);
30864   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30865   sel = force_reg (vmode, sel);
30866
30867   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30868   return true;
30869 }
30870
30871 static bool
30872 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30873 {
30874   /* Check if the input mask matches vext before reordering the
30875      operands.  */
30876   if (TARGET_NEON)
30877     if (arm_evpc_neon_vext (d))
30878       return true;
30879
30880   /* The pattern matching functions above are written to look for a small
30881      number to begin the sequence (0, 1, N/2).  If we begin with an index
30882      from the second operand, we can swap the operands.  */
30883   if (d->perm[0] >= d->nelt)
30884     {
30885       unsigned i, nelt = d->nelt;
30886       rtx x;
30887
30888       for (i = 0; i < nelt; ++i)
30889         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30890
30891       x = d->op0;
30892       d->op0 = d->op1;
30893       d->op1 = x;
30894     }
30895
30896   if (TARGET_NEON)
30897     {
30898       if (arm_evpc_neon_vuzp (d))
30899         return true;
30900       if (arm_evpc_neon_vzip (d))
30901         return true;
30902       if (arm_evpc_neon_vrev (d))
30903         return true;
30904       if (arm_evpc_neon_vtrn (d))
30905         return true;
30906       return arm_evpc_neon_vtbl (d);
30907     }
30908   return false;
30909 }
30910
30911 /* Expand a vec_perm_const pattern.  */
30912
30913 bool
30914 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30915 {
30916   struct expand_vec_perm_d d;
30917   int i, nelt, which;
30918
30919   d.target = target;
30920   d.op0 = op0;
30921   d.op1 = op1;
30922
30923   d.vmode = GET_MODE (target);
30924   gcc_assert (VECTOR_MODE_P (d.vmode));
30925   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30926   d.testing_p = false;
30927
30928   for (i = which = 0; i < nelt; ++i)
30929     {
30930       rtx e = XVECEXP (sel, 0, i);
30931       int ei = INTVAL (e) & (2 * nelt - 1);
30932       which |= (ei < nelt ? 1 : 2);
30933       d.perm[i] = ei;
30934     }
30935
30936   switch (which)
30937     {
30938     default:
30939       gcc_unreachable();
30940
30941     case 3:
30942       d.one_vector_p = false;
30943       if (!rtx_equal_p (op0, op1))
30944         break;
30945
30946       /* The elements of PERM do not suggest that only the first operand
30947          is used, but both operands are identical.  Allow easier matching
30948          of the permutation by folding the permutation into the single
30949          input vector.  */
30950       /* FALLTHRU */
30951     case 2:
30952       for (i = 0; i < nelt; ++i)
30953         d.perm[i] &= nelt - 1;
30954       d.op0 = op1;
30955       d.one_vector_p = true;
30956       break;
30957
30958     case 1:
30959       d.op1 = op0;
30960       d.one_vector_p = true;
30961       break;
30962     }
30963
30964   return arm_expand_vec_perm_const_1 (&d);
30965 }
30966
30967 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
30968
30969 static bool
30970 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30971                                  const unsigned char *sel)
30972 {
30973   struct expand_vec_perm_d d;
30974   unsigned int i, nelt, which;
30975   bool ret;
30976
30977   d.vmode = vmode;
30978   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30979   d.testing_p = true;
30980   memcpy (d.perm, sel, nelt);
30981
30982   /* Categorize the set of elements in the selector.  */
30983   for (i = which = 0; i < nelt; ++i)
30984     {
30985       unsigned char e = d.perm[i];
30986       gcc_assert (e < 2 * nelt);
30987       which |= (e < nelt ? 1 : 2);
30988     }
30989
30990   /* For all elements from second vector, fold the elements to first.  */
30991   if (which == 2)
30992     for (i = 0; i < nelt; ++i)
30993       d.perm[i] -= nelt;
30994
30995   /* Check whether the mask can be applied to the vector type.  */
30996   d.one_vector_p = (which != 3);
30997
30998   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30999   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31000   if (!d.one_vector_p)
31001     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31002
31003   start_sequence ();
31004   ret = arm_expand_vec_perm_const_1 (&d);
31005   end_sequence ();
31006
31007   return ret;
31008 }
31009
31010 bool
31011 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31012 {
31013   /* If we are soft float and we do not have ldrd
31014      then all auto increment forms are ok.  */
31015   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31016     return true;
31017
31018   switch (code)
31019     {
31020       /* Post increment and Pre Decrement are supported for all
31021          instruction forms except for vector forms.  */
31022     case ARM_POST_INC:
31023     case ARM_PRE_DEC:
31024       if (VECTOR_MODE_P (mode))
31025         {
31026           if (code != ARM_PRE_DEC)
31027             return true;
31028           else
31029             return false;
31030         }
31031
31032       return true;
31033
31034     case ARM_POST_DEC:
31035     case ARM_PRE_INC:
31036       /* Without LDRD and mode size greater than
31037          word size, there is no point in auto-incrementing
31038          because ldm and stm will not have these forms.  */
31039       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31040         return false;
31041
31042       /* Vector and floating point modes do not support
31043          these auto increment forms.  */
31044       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31045         return false;
31046
31047       return true;
31048
31049     default:
31050       return false;
31051
31052     }
31053
31054   return false;
31055 }
31056
31057 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31058    on ARM, since we know that shifts by negative amounts are no-ops.
31059    Additionally, the default expansion code is not available or suitable
31060    for post-reload insn splits (this can occur when the register allocator
31061    chooses not to do a shift in NEON).
31062
31063    This function is used in both initial expand and post-reload splits, and
31064    handles all kinds of 64-bit shifts.
31065
31066    Input requirements:
31067     - It is safe for the input and output to be the same register, but
31068       early-clobber rules apply for the shift amount and scratch registers.
31069     - Shift by register requires both scratch registers.  In all other cases
31070       the scratch registers may be NULL.
31071     - Ashiftrt by a register also clobbers the CC register.  */
31072 void
31073 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31074                                rtx amount, rtx scratch1, rtx scratch2)
31075 {
31076   rtx out_high = gen_highpart (SImode, out);
31077   rtx out_low = gen_lowpart (SImode, out);
31078   rtx in_high = gen_highpart (SImode, in);
31079   rtx in_low = gen_lowpart (SImode, in);
31080
31081   /* Terminology:
31082         in = the register pair containing the input value.
31083         out = the destination register pair.
31084         up = the high- or low-part of each pair.
31085         down = the opposite part to "up".
31086      In a shift, we can consider bits to shift from "up"-stream to
31087      "down"-stream, so in a left-shift "up" is the low-part and "down"
31088      is the high-part of each register pair.  */
31089
31090   rtx out_up   = code == ASHIFT ? out_low : out_high;
31091   rtx out_down = code == ASHIFT ? out_high : out_low;
31092   rtx in_up   = code == ASHIFT ? in_low : in_high;
31093   rtx in_down = code == ASHIFT ? in_high : in_low;
31094
31095   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31096   gcc_assert (out
31097               && (REG_P (out) || GET_CODE (out) == SUBREG)
31098               && GET_MODE (out) == DImode);
31099   gcc_assert (in
31100               && (REG_P (in) || GET_CODE (in) == SUBREG)
31101               && GET_MODE (in) == DImode);
31102   gcc_assert (amount
31103               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31104                    && GET_MODE (amount) == SImode)
31105                   || CONST_INT_P (amount)));
31106   gcc_assert (scratch1 == NULL
31107               || (GET_CODE (scratch1) == SCRATCH)
31108               || (GET_MODE (scratch1) == SImode
31109                   && REG_P (scratch1)));
31110   gcc_assert (scratch2 == NULL
31111               || (GET_CODE (scratch2) == SCRATCH)
31112               || (GET_MODE (scratch2) == SImode
31113                   && REG_P (scratch2)));
31114   gcc_assert (!REG_P (out) || !REG_P (amount)
31115               || !HARD_REGISTER_P (out)
31116               || (REGNO (out) != REGNO (amount)
31117                   && REGNO (out) + 1 != REGNO (amount)));
31118
31119   /* Macros to make following code more readable.  */
31120   #define SUB_32(DEST,SRC) \
31121             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31122   #define RSB_32(DEST,SRC) \
31123             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31124   #define SUB_S_32(DEST,SRC) \
31125             gen_addsi3_compare0 ((DEST), (SRC), \
31126                                  GEN_INT (-32))
31127   #define SET(DEST,SRC) \
31128             gen_rtx_SET (SImode, (DEST), (SRC))
31129   #define SHIFT(CODE,SRC,AMOUNT) \
31130             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31131   #define LSHIFT(CODE,SRC,AMOUNT) \
31132             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31133                             SImode, (SRC), (AMOUNT))
31134   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31135             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31136                             SImode, (SRC), (AMOUNT))
31137   #define ORR(A,B) \
31138             gen_rtx_IOR (SImode, (A), (B))
31139   #define BRANCH(COND,LABEL) \
31140             gen_arm_cond_branch ((LABEL), \
31141                                  gen_rtx_ ## COND (CCmode, cc_reg, \
31142                                                    const0_rtx), \
31143                                  cc_reg)
31144
31145   /* Shifts by register and shifts by constant are handled separately.  */
31146   if (CONST_INT_P (amount))
31147     {
31148       /* We have a shift-by-constant.  */
31149
31150       /* First, handle out-of-range shift amounts.
31151          In both cases we try to match the result an ARM instruction in a
31152          shift-by-register would give.  This helps reduce execution
31153          differences between optimization levels, but it won't stop other
31154          parts of the compiler doing different things.  This is "undefined
31155          behaviour, in any case.  */
31156       if (INTVAL (amount) <= 0)
31157         emit_insn (gen_movdi (out, in));
31158       else if (INTVAL (amount) >= 64)
31159         {
31160           if (code == ASHIFTRT)
31161             {
31162               rtx const31_rtx = GEN_INT (31);
31163               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31164               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31165             }
31166           else
31167             emit_insn (gen_movdi (out, const0_rtx));
31168         }
31169
31170       /* Now handle valid shifts. */
31171       else if (INTVAL (amount) < 32)
31172         {
31173           /* Shifts by a constant less than 32.  */
31174           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31175
31176           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31177           emit_insn (SET (out_down,
31178                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
31179                                out_down)));
31180           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31181         }
31182       else
31183         {
31184           /* Shifts by a constant greater than 31.  */
31185           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31186
31187           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31188           if (code == ASHIFTRT)
31189             emit_insn (gen_ashrsi3 (out_up, in_up,
31190                                     GEN_INT (31)));
31191           else
31192             emit_insn (SET (out_up, const0_rtx));
31193         }
31194     }
31195   else
31196     {
31197       /* We have a shift-by-register.  */
31198       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31199
31200       /* This alternative requires the scratch registers.  */
31201       gcc_assert (scratch1 && REG_P (scratch1));
31202       gcc_assert (scratch2 && REG_P (scratch2));
31203
31204       /* We will need the values "amount-32" and "32-amount" later.
31205          Swapping them around now allows the later code to be more general. */
31206       switch (code)
31207         {
31208         case ASHIFT:
31209           emit_insn (SUB_32 (scratch1, amount));
31210           emit_insn (RSB_32 (scratch2, amount));
31211           break;
31212         case ASHIFTRT:
31213           emit_insn (RSB_32 (scratch1, amount));
31214           /* Also set CC = amount > 32.  */
31215           emit_insn (SUB_S_32 (scratch2, amount));
31216           break;
31217         case LSHIFTRT:
31218           emit_insn (RSB_32 (scratch1, amount));
31219           emit_insn (SUB_32 (scratch2, amount));
31220           break;
31221         default:
31222           gcc_unreachable ();
31223         }
31224
31225       /* Emit code like this:
31226
31227          arithmetic-left:
31228             out_down = in_down << amount;
31229             out_down = (in_up << (amount - 32)) | out_down;
31230             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31231             out_up = in_up << amount;
31232
31233          arithmetic-right:
31234             out_down = in_down >> amount;
31235             out_down = (in_up << (32 - amount)) | out_down;
31236             if (amount < 32)
31237               out_down = ((signed)in_up >> (amount - 32)) | out_down;
31238             out_up = in_up << amount;
31239
31240          logical-right:
31241             out_down = in_down >> amount;
31242             out_down = (in_up << (32 - amount)) | out_down;
31243             if (amount < 32)
31244               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31245             out_up = in_up << amount;
31246
31247           The ARM and Thumb2 variants are the same but implemented slightly
31248           differently.  If this were only called during expand we could just
31249           use the Thumb2 case and let combine do the right thing, but this
31250           can also be called from post-reload splitters.  */
31251
31252       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31253
31254       if (!TARGET_THUMB2)
31255         {
31256           /* Emit code for ARM mode.  */
31257           emit_insn (SET (out_down,
31258                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31259           if (code == ASHIFTRT)
31260             {
31261               rtx done_label = gen_label_rtx ();
31262               emit_jump_insn (BRANCH (LT, done_label));
31263               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31264                                              out_down)));
31265               emit_label (done_label);
31266             }
31267           else
31268             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31269                                            out_down)));
31270         }
31271       else
31272         {
31273           /* Emit code for Thumb2 mode.
31274              Thumb2 can't do shift and or in one insn.  */
31275           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31276           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31277
31278           if (code == ASHIFTRT)
31279             {
31280               rtx done_label = gen_label_rtx ();
31281               emit_jump_insn (BRANCH (LT, done_label));
31282               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31283               emit_insn (SET (out_down, ORR (out_down, scratch2)));
31284               emit_label (done_label);
31285             }
31286           else
31287             {
31288               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31289               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31290             }
31291         }
31292
31293       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31294     }
31295
31296   #undef SUB_32
31297   #undef RSB_32
31298   #undef SUB_S_32
31299   #undef SET
31300   #undef SHIFT
31301   #undef LSHIFT
31302   #undef REV_LSHIFT
31303   #undef ORR
31304   #undef BRANCH
31305 }
31306
31307
31308 /* Returns true if a valid comparison operation and makes
31309    the operands in a form that is valid.  */
31310 bool
31311 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31312 {
31313   enum rtx_code code = GET_CODE (*comparison);
31314   int code_int;
31315   enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31316     ? GET_MODE (*op2) : GET_MODE (*op1);
31317
31318   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31319
31320   if (code == UNEQ || code == LTGT)
31321     return false;
31322
31323   code_int = (int)code;
31324   arm_canonicalize_comparison (&code_int, op1, op2, 0);
31325   PUT_CODE (*comparison, (enum rtx_code)code_int);
31326
31327   switch (mode)
31328     {
31329     case SImode:
31330       if (!arm_add_operand (*op1, mode))
31331         *op1 = force_reg (mode, *op1);
31332       if (!arm_add_operand (*op2, mode))
31333         *op2 = force_reg (mode, *op2);
31334       return true;
31335
31336     case DImode:
31337       if (!cmpdi_operand (*op1, mode))
31338         *op1 = force_reg (mode, *op1);
31339       if (!cmpdi_operand (*op2, mode))
31340         *op2 = force_reg (mode, *op2);
31341       return true;
31342
31343     case SFmode:
31344     case DFmode:
31345       if (!arm_float_compare_operand (*op1, mode))
31346         *op1 = force_reg (mode, *op1);
31347       if (!arm_float_compare_operand (*op2, mode))
31348         *op2 = force_reg (mode, *op2);
31349       return true;
31350     default:
31351       break;
31352     }
31353
31354   return false;
31355
31356 }
31357
31358 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
31359
31360 static unsigned HOST_WIDE_INT
31361 arm_asan_shadow_offset (void)
31362 {
31363   return (unsigned HOST_WIDE_INT) 1 << 29;
31364 }
31365
31366
31367 /* This is a temporary fix for PR60655.  Ideally we need
31368    to handle most of these cases in the generic part but
31369    currently we reject minus (..) (sym_ref).  We try to
31370    ameliorate the case with minus (sym_ref1) (sym_ref2)
31371    where they are in the same section.  */
31372
31373 static bool
31374 arm_const_not_ok_for_debug_p (rtx p)
31375 {
31376   tree decl_op0 = NULL;
31377   tree decl_op1 = NULL;
31378
31379   if (GET_CODE (p) == MINUS)
31380     {
31381       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31382         {
31383           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31384           if (decl_op1
31385               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31386               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31387             {
31388               if ((TREE_CODE (decl_op1) == VAR_DECL
31389                    || TREE_CODE (decl_op1) == CONST_DECL)
31390                   && (TREE_CODE (decl_op0) == VAR_DECL
31391                       || TREE_CODE (decl_op0) == CONST_DECL))
31392                 return (get_variable_section (decl_op1, false)
31393                         != get_variable_section (decl_op0, false));
31394
31395               if (TREE_CODE (decl_op1) == LABEL_DECL
31396                   && TREE_CODE (decl_op0) == LABEL_DECL)
31397                 return (DECL_CONTEXT (decl_op1)
31398                         != DECL_CONTEXT (decl_op0));
31399             }
31400
31401           return true;
31402         }
31403     }
31404
31405   return false;
31406 }
31407
31408 #include "gt-arm.h"