1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
33 #include "stringpool.h"
39 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
46 #include "insn-attr.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
67 /* This file should be included last. */
68 #include "target-def.h"
70 /* This is used for communication between ASM_OUTPUT_LABEL and
71 ASM_OUTPUT_LABELREF. */
72 int ia64_asm_output_label
= 0;
74 /* Register names for ia64_expand_prologue. */
75 static const char * const ia64_reg_numbers
[96] =
76 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
77 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
78 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
79 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
80 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
81 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
82 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
83 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
84 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
85 "r104","r105","r106","r107","r108","r109","r110","r111",
86 "r112","r113","r114","r115","r116","r117","r118","r119",
87 "r120","r121","r122","r123","r124","r125","r126","r127"};
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_input_reg_names
[8] =
91 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
93 /* ??? These strings could be shared with REGISTER_NAMES. */
94 static const char * const ia64_local_reg_names
[80] =
95 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
96 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
97 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
98 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
99 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
100 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
101 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
102 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
103 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
104 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
106 /* ??? These strings could be shared with REGISTER_NAMES. */
107 static const char * const ia64_output_reg_names
[8] =
108 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
110 /* Variables which are this size or smaller are put in the sdata/sbss
113 unsigned int ia64_section_threshold
;
115 /* The following variable is used by the DFA insn scheduler. The value is
116 TRUE if we do insn bundling instead of insn scheduling. */
128 number_of_ia64_frame_regs
131 /* Structure to be filled in by ia64_compute_frame_size with register
132 save masks and offsets for the current function. */
134 struct ia64_frame_info
136 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
137 the caller's scratch area. */
138 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
139 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
140 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
141 HARD_REG_SET mask
; /* mask of saved registers. */
142 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
143 registers or long-term scratches. */
144 int n_spilled
; /* number of spilled registers. */
145 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
146 int n_input_regs
; /* number of input registers used. */
147 int n_local_regs
; /* number of local registers used. */
148 int n_output_regs
; /* number of output registers used. */
149 int n_rotate_regs
; /* number of rotating registers used. */
151 char need_regstk
; /* true if a .regstk directive needed. */
152 char initialized
; /* true if the data is finalized. */
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info
;
157 /* The actual registers that are emitted. */
158 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
160 static int ia64_first_cycle_multipass_dfa_lookahead (void);
161 static void ia64_dependencies_evaluation_hook (rtx_insn
*, rtx_insn
*);
162 static void ia64_init_dfa_pre_cycle_insn (void);
163 static rtx
ia64_dfa_pre_cycle_insn (void);
164 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
165 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn
*, int, int, int *);
166 static void ia64_h_i_d_extended (void);
167 static void * ia64_alloc_sched_context (void);
168 static void ia64_init_sched_context (void *, bool);
169 static void ia64_set_sched_context (void *);
170 static void ia64_clear_sched_context (void *);
171 static void ia64_free_sched_context (void *);
172 static int ia64_mode_to_int (machine_mode
);
173 static void ia64_set_sched_flags (spec_info_t
);
174 static ds_t
ia64_get_insn_spec_ds (rtx_insn
*);
175 static ds_t
ia64_get_insn_checked_ds (rtx_insn
*);
176 static bool ia64_skip_rtx_p (const_rtx
);
177 static int ia64_speculate_insn (rtx_insn
*, ds_t
, rtx
*);
178 static bool ia64_needs_block_p (ds_t
);
179 static rtx
ia64_gen_spec_check (rtx_insn
*, rtx_insn
*, ds_t
);
180 static int ia64_spec_check_p (rtx
);
181 static int ia64_spec_check_src_p (rtx
);
182 static rtx
gen_tls_get_addr (void);
183 static rtx
gen_thread_pointer (void);
184 static int find_gr_spill (enum ia64_frame_regs
, int);
185 static int next_scratch_gr_reg (void);
186 static void mark_reg_gr_used_mask (rtx
, void *);
187 static void ia64_compute_frame_size (HOST_WIDE_INT
);
188 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
189 static void finish_spill_pointers (void);
190 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
191 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
192 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
193 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
194 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
195 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
197 static void ia64_option_override (void);
198 static bool ia64_can_eliminate (const int, const int);
199 static machine_mode
hfa_element_mode (const_tree
, bool);
200 static void ia64_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
202 static int ia64_arg_partial_bytes (cumulative_args_t
, machine_mode
,
204 static rtx
ia64_function_arg_1 (cumulative_args_t
, machine_mode
,
205 const_tree
, bool, bool);
206 static rtx
ia64_function_arg (cumulative_args_t
, machine_mode
,
208 static rtx
ia64_function_incoming_arg (cumulative_args_t
,
209 machine_mode
, const_tree
, bool);
210 static void ia64_function_arg_advance (cumulative_args_t
, machine_mode
,
212 static pad_direction
ia64_function_arg_padding (machine_mode
, const_tree
);
213 static unsigned int ia64_function_arg_boundary (machine_mode
,
215 static bool ia64_function_ok_for_sibcall (tree
, tree
);
216 static bool ia64_return_in_memory (const_tree
, const_tree
);
217 static rtx
ia64_function_value (const_tree
, const_tree
, bool);
218 static rtx
ia64_libcall_value (machine_mode
, const_rtx
);
219 static bool ia64_function_value_regno_p (const unsigned int);
220 static int ia64_register_move_cost (machine_mode
, reg_class_t
,
222 static int ia64_memory_move_cost (machine_mode mode
, reg_class_t
,
224 static bool ia64_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
225 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
226 static void fix_range (const char *);
227 static struct machine_function
* ia64_init_machine_status (void);
228 static void emit_insn_group_barriers (FILE *);
229 static void emit_all_insn_group_barriers (FILE *);
230 static void final_emit_insn_group_barriers (FILE *);
231 static void emit_predicate_relation_info (void);
232 static void ia64_reorg (void);
233 static bool ia64_in_small_data_p (const_tree
);
234 static void process_epilogue (FILE *, rtx
, bool, bool);
236 static bool ia64_assemble_integer (rtx
, unsigned int, int);
237 static void ia64_output_function_prologue (FILE *);
238 static void ia64_output_function_epilogue (FILE *);
239 static void ia64_output_function_end_prologue (FILE *);
241 static void ia64_print_operand (FILE *, rtx
, int);
242 static void ia64_print_operand_address (FILE *, machine_mode
, rtx
);
243 static bool ia64_print_operand_punct_valid_p (unsigned char code
);
245 static int ia64_issue_rate (void);
246 static int ia64_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, dw_t
);
247 static void ia64_sched_init (FILE *, int, int);
248 static void ia64_sched_init_global (FILE *, int, int);
249 static void ia64_sched_finish_global (FILE *, int);
250 static void ia64_sched_finish (FILE *, int);
251 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn
**, int *, int, int);
252 static int ia64_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
253 static int ia64_sched_reorder2 (FILE *, int, rtx_insn
**, int *, int);
254 static int ia64_variable_issue (FILE *, int, rtx_insn
*, int);
256 static void ia64_asm_unwind_emit (FILE *, rtx_insn
*);
257 static void ia64_asm_emit_except_personality (rtx
);
258 static void ia64_asm_init_sections (void);
260 static enum unwind_info_type
ia64_debug_unwind_info (void);
262 static struct bundle_state
*get_free_bundle_state (void);
263 static void free_bundle_state (struct bundle_state
*);
264 static void initiate_bundle_states (void);
265 static void finish_bundle_states (void);
266 static int insert_bundle_state (struct bundle_state
*);
267 static void initiate_bundle_state_table (void);
268 static void finish_bundle_state_table (void);
269 static int try_issue_nops (struct bundle_state
*, int);
270 static int try_issue_insn (struct bundle_state
*, rtx
);
271 static void issue_nops_and_insn (struct bundle_state
*, int, rtx_insn
*,
273 static int get_max_pos (state_t
);
274 static int get_template (state_t
, int);
276 static rtx_insn
*get_next_important_insn (rtx_insn
*, rtx_insn
*);
277 static bool important_for_bundling_p (rtx_insn
*);
278 static bool unknown_for_bundling_p (rtx_insn
*);
279 static void bundling (FILE *, int, rtx_insn
*, rtx_insn
*);
281 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
282 HOST_WIDE_INT
, tree
);
283 static void ia64_file_start (void);
284 static void ia64_globalize_decl_name (FILE *, tree
);
286 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
287 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
288 static section
*ia64_select_rtx_section (machine_mode
, rtx
,
289 unsigned HOST_WIDE_INT
);
290 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
292 static unsigned int ia64_section_type_flags (tree
, const char *, int);
293 static void ia64_init_libfuncs (void)
295 static void ia64_hpux_init_libfuncs (void)
297 static void ia64_sysv4_init_libfuncs (void)
299 static void ia64_vms_init_libfuncs (void)
301 static void ia64_soft_fp_init_libfuncs (void)
303 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode
)
305 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
308 static bool ia64_attribute_takes_identifier_p (const_tree
);
309 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
310 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
311 static void ia64_encode_section_info (tree
, rtx
, int);
312 static rtx
ia64_struct_value_rtx (tree
, int);
313 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
314 static bool ia64_scalar_mode_supported_p (scalar_mode mode
);
315 static bool ia64_vector_mode_supported_p (machine_mode mode
);
316 static bool ia64_legitimate_constant_p (machine_mode
, rtx
);
317 static bool ia64_legitimate_address_p (machine_mode
, rtx
, bool);
318 static bool ia64_cannot_force_const_mem (machine_mode
, rtx
);
319 static const char *ia64_mangle_type (const_tree
);
320 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
321 static const char *ia64_invalid_unary_op (int, const_tree
);
322 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
323 static machine_mode
ia64_c_mode_for_suffix (char);
324 static void ia64_trampoline_init (rtx
, tree
, rtx
);
325 static void ia64_override_options_after_change (void);
326 static bool ia64_member_type_forces_blk (const_tree
, machine_mode
);
328 static tree
ia64_fold_builtin (tree
, int, tree
*, bool);
329 static tree
ia64_builtin_decl (unsigned, bool);
331 static reg_class_t
ia64_preferred_reload_class (rtx
, reg_class_t
);
332 static machine_mode
ia64_get_reg_raw_mode (int regno
);
333 static section
* ia64_hpux_function_section (tree
, enum node_frequency
,
336 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
337 const unsigned char *sel
);
339 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode
);
340 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode
);
341 static bool ia64_modes_tieable_p (machine_mode
, machine_mode
);
342 static bool ia64_can_change_mode_class (machine_mode
, machine_mode
,
345 #define MAX_VECT_LEN 8
347 struct expand_vec_perm_d
349 rtx target
, op0
, op1
;
350 unsigned char perm
[MAX_VECT_LEN
];
357 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
);
360 /* Table of valid machine attributes. */
361 static const struct attribute_spec ia64_attribute_table
[] =
363 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
364 affects_type_identity } */
365 { "syscall_linkage", 0, 0, false, true, true, NULL
, false },
366 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
,
368 #if TARGET_ABI_OPEN_VMS
369 { "common_object", 1, 1, true, false, false,
370 ia64_vms_common_object_attribute
, false },
372 { "version_id", 1, 1, true, false, false,
373 ia64_handle_version_id_attribute
, false },
374 { NULL
, 0, 0, false, false, false, NULL
, false }
377 /* Initialize the GCC target structure. */
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
381 #undef TARGET_INIT_BUILTINS
382 #define TARGET_INIT_BUILTINS ia64_init_builtins
384 #undef TARGET_FOLD_BUILTIN
385 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
387 #undef TARGET_EXPAND_BUILTIN
388 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
390 #undef TARGET_BUILTIN_DECL
391 #define TARGET_BUILTIN_DECL ia64_builtin_decl
393 #undef TARGET_ASM_BYTE_OP
394 #define TARGET_ASM_BYTE_OP "\tdata1\t"
395 #undef TARGET_ASM_ALIGNED_HI_OP
396 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
397 #undef TARGET_ASM_ALIGNED_SI_OP
398 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
399 #undef TARGET_ASM_ALIGNED_DI_OP
400 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
401 #undef TARGET_ASM_UNALIGNED_HI_OP
402 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
403 #undef TARGET_ASM_UNALIGNED_SI_OP
404 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
405 #undef TARGET_ASM_UNALIGNED_DI_OP
406 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
407 #undef TARGET_ASM_INTEGER
408 #define TARGET_ASM_INTEGER ia64_assemble_integer
410 #undef TARGET_OPTION_OVERRIDE
411 #define TARGET_OPTION_OVERRIDE ia64_option_override
413 #undef TARGET_ASM_FUNCTION_PROLOGUE
414 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
415 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
416 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
417 #undef TARGET_ASM_FUNCTION_EPILOGUE
418 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
420 #undef TARGET_PRINT_OPERAND
421 #define TARGET_PRINT_OPERAND ia64_print_operand
422 #undef TARGET_PRINT_OPERAND_ADDRESS
423 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
424 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
425 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
427 #undef TARGET_IN_SMALL_DATA_P
428 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
432 #undef TARGET_SCHED_ISSUE_RATE
433 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
434 #undef TARGET_SCHED_VARIABLE_ISSUE
435 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
436 #undef TARGET_SCHED_INIT
437 #define TARGET_SCHED_INIT ia64_sched_init
438 #undef TARGET_SCHED_FINISH
439 #define TARGET_SCHED_FINISH ia64_sched_finish
440 #undef TARGET_SCHED_INIT_GLOBAL
441 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
442 #undef TARGET_SCHED_FINISH_GLOBAL
443 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
444 #undef TARGET_SCHED_REORDER
445 #define TARGET_SCHED_REORDER ia64_sched_reorder
446 #undef TARGET_SCHED_REORDER2
447 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
449 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
450 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
452 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
453 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
455 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
456 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
457 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
458 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
460 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
461 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
462 ia64_first_cycle_multipass_dfa_lookahead_guard
464 #undef TARGET_SCHED_DFA_NEW_CYCLE
465 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
467 #undef TARGET_SCHED_H_I_D_EXTENDED
468 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
470 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
471 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
473 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
474 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
476 #undef TARGET_SCHED_SET_SCHED_CONTEXT
477 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
479 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
480 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
482 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
483 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
485 #undef TARGET_SCHED_SET_SCHED_FLAGS
486 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
488 #undef TARGET_SCHED_GET_INSN_SPEC_DS
489 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
491 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
492 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
494 #undef TARGET_SCHED_SPECULATE_INSN
495 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
497 #undef TARGET_SCHED_NEEDS_BLOCK_P
498 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
500 #undef TARGET_SCHED_GEN_SPEC_CHECK
501 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
503 #undef TARGET_SCHED_SKIP_RTX_P
504 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
506 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
507 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG ia64_function_arg
512 #undef TARGET_FUNCTION_INCOMING_ARG
513 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
514 #undef TARGET_FUNCTION_ARG_ADVANCE
515 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
516 #undef TARGET_FUNCTION_ARG_PADDING
517 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
518 #undef TARGET_FUNCTION_ARG_BOUNDARY
519 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
521 #undef TARGET_ASM_OUTPUT_MI_THUNK
522 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
523 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
524 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
526 #undef TARGET_ASM_FILE_START
527 #define TARGET_ASM_FILE_START ia64_file_start
529 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
530 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
532 #undef TARGET_REGISTER_MOVE_COST
533 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
534 #undef TARGET_MEMORY_MOVE_COST
535 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
536 #undef TARGET_RTX_COSTS
537 #define TARGET_RTX_COSTS ia64_rtx_costs
538 #undef TARGET_ADDRESS_COST
539 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
541 #undef TARGET_UNSPEC_MAY_TRAP_P
542 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
544 #undef TARGET_MACHINE_DEPENDENT_REORG
545 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
547 #undef TARGET_ENCODE_SECTION_INFO
548 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
550 #undef TARGET_SECTION_TYPE_FLAGS
551 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
554 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
555 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
558 /* ??? Investigate. */
560 #undef TARGET_PROMOTE_PROTOTYPES
561 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
564 #undef TARGET_FUNCTION_VALUE
565 #define TARGET_FUNCTION_VALUE ia64_function_value
566 #undef TARGET_LIBCALL_VALUE
567 #define TARGET_LIBCALL_VALUE ia64_libcall_value
568 #undef TARGET_FUNCTION_VALUE_REGNO_P
569 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
571 #undef TARGET_STRUCT_VALUE_RTX
572 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
575 #undef TARGET_SETUP_INCOMING_VARARGS
576 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
577 #undef TARGET_STRICT_ARGUMENT_NAMING
578 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
579 #undef TARGET_MUST_PASS_IN_STACK
580 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
581 #undef TARGET_GET_RAW_RESULT_MODE
582 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
583 #undef TARGET_GET_RAW_ARG_MODE
584 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
586 #undef TARGET_MEMBER_TYPE_FORCES_BLK
587 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
589 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
590 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
592 #undef TARGET_ASM_UNWIND_EMIT
593 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
594 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
595 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
596 #undef TARGET_ASM_INIT_SECTIONS
597 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
599 #undef TARGET_DEBUG_UNWIND_INFO
600 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
602 #undef TARGET_SCALAR_MODE_SUPPORTED_P
603 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
604 #undef TARGET_VECTOR_MODE_SUPPORTED_P
605 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
609 #undef TARGET_LEGITIMATE_ADDRESS_P
610 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
613 #define TARGET_LRA_P hook_bool_void_false
615 #undef TARGET_CANNOT_FORCE_CONST_MEM
616 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
618 #undef TARGET_MANGLE_TYPE
619 #define TARGET_MANGLE_TYPE ia64_mangle_type
621 #undef TARGET_INVALID_CONVERSION
622 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
623 #undef TARGET_INVALID_UNARY_OP
624 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
625 #undef TARGET_INVALID_BINARY_OP
626 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
628 #undef TARGET_C_MODE_FOR_SUFFIX
629 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
631 #undef TARGET_CAN_ELIMINATE
632 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
634 #undef TARGET_TRAMPOLINE_INIT
635 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
637 #undef TARGET_CAN_USE_DOLOOP_P
638 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
639 #undef TARGET_INVALID_WITHIN_DOLOOP
640 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
642 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
643 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
645 #undef TARGET_PREFERRED_RELOAD_CLASS
646 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
648 #undef TARGET_DELAY_SCHED2
649 #define TARGET_DELAY_SCHED2 true
651 /* Variable tracking should be run after all optimizations which
652 change order of insns. It also needs a valid CFG. */
653 #undef TARGET_DELAY_VARTRACK
654 #define TARGET_DELAY_VARTRACK true
656 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
657 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
659 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
660 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
662 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
663 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
665 #undef TARGET_HARD_REGNO_NREGS
666 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
667 #undef TARGET_HARD_REGNO_MODE_OK
668 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
670 #undef TARGET_MODES_TIEABLE_P
671 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
673 #undef TARGET_CAN_CHANGE_MODE_CLASS
674 #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
676 struct gcc_target targetm
= TARGET_INITIALIZER
;
678 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
679 identifier as an argument, so the front end shouldn't look it up. */
682 ia64_attribute_takes_identifier_p (const_tree attr_id
)
684 if (is_attribute_p ("model", attr_id
))
686 #if TARGET_ABI_OPEN_VMS
687 if (is_attribute_p ("common_object", attr_id
))
695 ADDR_AREA_NORMAL
, /* normal address area */
696 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
700 static GTY(()) tree small_ident1
;
701 static GTY(()) tree small_ident2
;
706 if (small_ident1
== 0)
708 small_ident1
= get_identifier ("small");
709 small_ident2
= get_identifier ("__small__");
713 /* Retrieve the address area that has been chosen for the given decl. */
715 static ia64_addr_area
716 ia64_get_addr_area (tree decl
)
720 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
726 id
= TREE_VALUE (TREE_VALUE (model_attr
));
727 if (id
== small_ident1
|| id
== small_ident2
)
728 return ADDR_AREA_SMALL
;
730 return ADDR_AREA_NORMAL
;
734 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
735 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
737 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
739 tree arg
, decl
= *node
;
742 arg
= TREE_VALUE (args
);
743 if (arg
== small_ident1
|| arg
== small_ident2
)
745 addr_area
= ADDR_AREA_SMALL
;
749 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
751 *no_add_attrs
= true;
754 switch (TREE_CODE (decl
))
757 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
759 && !TREE_STATIC (decl
))
761 error_at (DECL_SOURCE_LOCATION (decl
),
762 "an address area attribute cannot be specified for "
764 *no_add_attrs
= true;
766 area
= ia64_get_addr_area (decl
);
767 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
769 error ("address area of %q+D conflicts with previous "
770 "declaration", decl
);
771 *no_add_attrs
= true;
776 error_at (DECL_SOURCE_LOCATION (decl
),
777 "address area attribute cannot be specified for "
779 *no_add_attrs
= true;
783 warning (OPT_Wattributes
, "%qE attribute ignored",
785 *no_add_attrs
= true;
792 /* Part of the low level implementation of DEC Ada pragma Common_Object which
793 enables the shared use of variables stored in overlaid linker areas
794 corresponding to the use of Fortran COMMON. */
797 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
798 int flags ATTRIBUTE_UNUSED
,
804 gcc_assert (DECL_P (decl
));
806 DECL_COMMON (decl
) = 1;
807 id
= TREE_VALUE (args
);
808 if (TREE_CODE (id
) != IDENTIFIER_NODE
&& TREE_CODE (id
) != STRING_CST
)
810 error ("%qE attribute requires a string constant argument", name
);
811 *no_add_attrs
= true;
817 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
820 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
821 unsigned HOST_WIDE_INT size
,
824 tree attr
= DECL_ATTRIBUTES (decl
);
827 attr
= lookup_attribute ("common_object", attr
);
830 tree id
= TREE_VALUE (TREE_VALUE (attr
));
833 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
834 name
= IDENTIFIER_POINTER (id
);
835 else if (TREE_CODE (id
) == STRING_CST
)
836 name
= TREE_STRING_POINTER (id
);
840 fprintf (file
, "\t.vms_common\t\"%s\",", name
);
843 fprintf (file
, "%s", COMMON_ASM_OP
);
845 /* Code from elfos.h. */
846 assemble_name (file
, name
);
847 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u",
848 size
, align
/ BITS_PER_UNIT
);
854 ia64_encode_addr_area (tree decl
, rtx symbol
)
858 flags
= SYMBOL_REF_FLAGS (symbol
);
859 switch (ia64_get_addr_area (decl
))
861 case ADDR_AREA_NORMAL
: break;
862 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
863 default: gcc_unreachable ();
865 SYMBOL_REF_FLAGS (symbol
) = flags
;
869 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
871 default_encode_section_info (decl
, rtl
, first
);
873 /* Careful not to prod global register variables. */
874 if (TREE_CODE (decl
) == VAR_DECL
875 && GET_CODE (DECL_RTL (decl
)) == MEM
876 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
877 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
878 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
881 /* Return 1 if the operands of a move are ok. */
884 ia64_move_ok (rtx dst
, rtx src
)
886 /* If we're under init_recog_no_volatile, we'll not be able to use
887 memory_operand. So check the code directly and don't worry about
888 the validity of the underlying address, which should have been
889 checked elsewhere anyway. */
890 if (GET_CODE (dst
) != MEM
)
892 if (GET_CODE (src
) == MEM
)
894 if (register_operand (src
, VOIDmode
))
897 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
898 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
899 return src
== const0_rtx
;
901 return satisfies_constraint_G (src
);
904 /* Return 1 if the operands are ok for a floating point load pair. */
907 ia64_load_pair_ok (rtx dst
, rtx src
)
909 /* ??? There is a thinko in the implementation of the "x" constraint and the
910 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
911 also return false for it. */
912 if (GET_CODE (dst
) != REG
913 || !(FP_REGNO_P (REGNO (dst
)) && FP_REGNO_P (REGNO (dst
) + 1)))
915 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
917 switch (GET_CODE (XEXP (src
, 0)))
926 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
928 if (GET_CODE (adjust
) != CONST_INT
929 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
940 addp4_optimize_ok (rtx op1
, rtx op2
)
942 return (basereg_operand (op1
, GET_MODE(op1
)) !=
943 basereg_operand (op2
, GET_MODE(op2
)));
946 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
947 Return the length of the field, or <= 0 on failure. */
950 ia64_depz_field_mask (rtx rop
, rtx rshift
)
952 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
953 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
955 /* Get rid of the zero bits we're shifting in. */
958 /* We must now have a solid block of 1's at bit 0. */
959 return exact_log2 (op
+ 1);
962 /* Return the TLS model to use for ADDR. */
964 static enum tls_model
965 tls_symbolic_operand_type (rtx addr
)
967 enum tls_model tls_kind
= TLS_MODEL_NONE
;
969 if (GET_CODE (addr
) == CONST
)
971 if (GET_CODE (XEXP (addr
, 0)) == PLUS
972 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
973 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
975 else if (GET_CODE (addr
) == SYMBOL_REF
)
976 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
981 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
982 as a base register. */
985 ia64_reg_ok_for_base_p (const_rtx reg
, bool strict
)
988 && REGNO_OK_FOR_BASE_P (REGNO (reg
)))
991 && (GENERAL_REGNO_P (REGNO (reg
))
992 || !HARD_REGISTER_P (reg
)))
999 ia64_legitimate_address_reg (const_rtx reg
, bool strict
)
1001 if ((REG_P (reg
) && ia64_reg_ok_for_base_p (reg
, strict
))
1002 || (GET_CODE (reg
) == SUBREG
&& REG_P (XEXP (reg
, 0))
1003 && ia64_reg_ok_for_base_p (XEXP (reg
, 0), strict
)))
1010 ia64_legitimate_address_disp (const_rtx reg
, const_rtx disp
, bool strict
)
1012 if (GET_CODE (disp
) == PLUS
1013 && rtx_equal_p (reg
, XEXP (disp
, 0))
1014 && (ia64_legitimate_address_reg (XEXP (disp
, 1), strict
)
1015 || (CONST_INT_P (XEXP (disp
, 1))
1016 && IN_RANGE (INTVAL (XEXP (disp
, 1)), -256, 255))))
1022 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1025 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED
,
1028 if (ia64_legitimate_address_reg (x
, strict
))
1030 else if ((GET_CODE (x
) == POST_INC
|| GET_CODE (x
) == POST_DEC
)
1031 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1032 && XEXP (x
, 0) != arg_pointer_rtx
)
1034 else if (GET_CODE (x
) == POST_MODIFY
1035 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1036 && XEXP (x
, 0) != arg_pointer_rtx
1037 && ia64_legitimate_address_disp (XEXP (x
, 0), XEXP (x
, 1), strict
))
1043 /* Return true if X is a constant that is valid for some immediate
1044 field in an instruction. */
1047 ia64_legitimate_constant_p (machine_mode mode
, rtx x
)
1049 switch (GET_CODE (x
))
1056 if (GET_MODE (x
) == VOIDmode
|| mode
== SFmode
|| mode
== DFmode
)
1058 return satisfies_constraint_G (x
);
1062 /* ??? Short term workaround for PR 28490. We must make the code here
1063 match the code in ia64_expand_move and move_operand, even though they
1064 are both technically wrong. */
1065 if (tls_symbolic_operand_type (x
) == 0)
1067 HOST_WIDE_INT addend
= 0;
1070 if (GET_CODE (op
) == CONST
1071 && GET_CODE (XEXP (op
, 0)) == PLUS
1072 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1074 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
1075 op
= XEXP (XEXP (op
, 0), 0);
1078 if (any_offset_symbol_operand (op
, mode
)
1079 || function_operand (op
, mode
))
1081 if (aligned_offset_symbol_operand (op
, mode
))
1082 return (addend
& 0x3fff) == 0;
1088 if (mode
== V2SFmode
)
1089 return satisfies_constraint_Y (x
);
1091 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1092 && GET_MODE_SIZE (mode
) <= 8);
1099 /* Don't allow TLS addresses to get spilled to memory. */
1102 ia64_cannot_force_const_mem (machine_mode mode
, rtx x
)
1106 return tls_symbolic_operand_type (x
) != 0;
1109 /* Expand a symbolic constant load. */
1112 ia64_expand_load_address (rtx dest
, rtx src
)
1114 gcc_assert (GET_CODE (dest
) == REG
);
1116 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1117 having to pointer-extend the value afterward. Other forms of address
1118 computation below are also more natural to compute as 64-bit quantities.
1119 If we've been given an SImode destination register, change it. */
1120 if (GET_MODE (dest
) != Pmode
)
1121 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
1122 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
1126 if (small_addr_symbolic_operand (src
, VOIDmode
))
1129 if (TARGET_AUTO_PIC
)
1130 emit_insn (gen_load_gprel64 (dest
, src
));
1131 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1132 emit_insn (gen_load_fptr (dest
, src
));
1133 else if (sdata_symbolic_operand (src
, VOIDmode
))
1134 emit_insn (gen_load_gprel (dest
, src
));
1135 else if (local_symbolic_operand64 (src
, VOIDmode
))
1137 /* We want to use @gprel rather than @ltoff relocations for local
1139 - @gprel does not require dynamic linker
1140 - and does not use .sdata section
1141 https://gcc.gnu.org/bugzilla/60465 */
1142 emit_insn (gen_load_gprel64 (dest
, src
));
1146 HOST_WIDE_INT addend
= 0;
1149 /* We did split constant offsets in ia64_expand_move, and we did try
1150 to keep them split in move_operand, but we also allowed reload to
1151 rematerialize arbitrary constants rather than spill the value to
1152 the stack and reload it. So we have to be prepared here to split
1153 them apart again. */
1154 if (GET_CODE (src
) == CONST
)
1156 HOST_WIDE_INT hi
, lo
;
1158 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
1159 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
1165 src
= plus_constant (Pmode
, XEXP (XEXP (src
, 0), 0), hi
);
1169 tmp
= gen_rtx_HIGH (Pmode
, src
);
1170 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1171 emit_insn (gen_rtx_SET (dest
, tmp
));
1173 tmp
= gen_rtx_LO_SUM (Pmode
, gen_const_mem (Pmode
, dest
), src
);
1174 emit_insn (gen_rtx_SET (dest
, tmp
));
1178 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
1179 emit_insn (gen_rtx_SET (dest
, tmp
));
1186 static GTY(()) rtx gen_tls_tga
;
1188 gen_tls_get_addr (void)
1191 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1195 static GTY(()) rtx thread_pointer_rtx
;
1197 gen_thread_pointer (void)
1199 if (!thread_pointer_rtx
)
1200 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1201 return thread_pointer_rtx
;
1205 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1206 rtx orig_op1
, HOST_WIDE_INT addend
)
1208 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
;
1211 HOST_WIDE_INT addend_lo
, addend_hi
;
1215 case TLS_MODEL_GLOBAL_DYNAMIC
:
1218 tga_op1
= gen_reg_rtx (Pmode
);
1219 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1221 tga_op2
= gen_reg_rtx (Pmode
);
1222 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1224 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1226 tga_op1
, Pmode
, tga_op2
, Pmode
);
1228 insns
= get_insns ();
1231 if (GET_MODE (op0
) != Pmode
)
1233 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1236 case TLS_MODEL_LOCAL_DYNAMIC
:
1237 /* ??? This isn't the completely proper way to do local-dynamic
1238 If the call to __tls_get_addr is used only by a single symbol,
1239 then we should (somehow) move the dtprel to the second arg
1240 to avoid the extra add. */
1243 tga_op1
= gen_reg_rtx (Pmode
);
1244 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1246 tga_op2
= const0_rtx
;
1248 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1250 tga_op1
, Pmode
, tga_op2
, Pmode
);
1252 insns
= get_insns ();
1255 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1257 tmp
= gen_reg_rtx (Pmode
);
1258 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1260 if (!register_operand (op0
, Pmode
))
1261 op0
= gen_reg_rtx (Pmode
);
1264 emit_insn (gen_load_dtprel (op0
, op1
));
1265 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1268 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1271 case TLS_MODEL_INITIAL_EXEC
:
1272 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1273 addend_hi
= addend
- addend_lo
;
1275 op1
= plus_constant (Pmode
, op1
, addend_hi
);
1278 tmp
= gen_reg_rtx (Pmode
);
1279 emit_insn (gen_load_tprel (tmp
, op1
));
1281 if (!register_operand (op0
, Pmode
))
1282 op0
= gen_reg_rtx (Pmode
);
1283 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1286 case TLS_MODEL_LOCAL_EXEC
:
1287 if (!register_operand (op0
, Pmode
))
1288 op0
= gen_reg_rtx (Pmode
);
1294 emit_insn (gen_load_tprel (op0
, op1
));
1295 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1298 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1306 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1307 orig_op0
, 1, OPTAB_DIRECT
);
1308 if (orig_op0
== op0
)
1310 if (GET_MODE (orig_op0
) == Pmode
)
1312 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1316 ia64_expand_move (rtx op0
, rtx op1
)
1318 machine_mode mode
= GET_MODE (op0
);
1320 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1321 op1
= force_reg (mode
, op1
);
1323 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1325 HOST_WIDE_INT addend
= 0;
1326 enum tls_model tls_kind
;
1329 if (GET_CODE (op1
) == CONST
1330 && GET_CODE (XEXP (op1
, 0)) == PLUS
1331 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1333 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1334 sym
= XEXP (XEXP (op1
, 0), 0);
1337 tls_kind
= tls_symbolic_operand_type (sym
);
1339 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1341 if (any_offset_symbol_operand (sym
, mode
))
1343 else if (aligned_offset_symbol_operand (sym
, mode
))
1345 HOST_WIDE_INT addend_lo
, addend_hi
;
1347 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1348 addend_hi
= addend
- addend_lo
;
1352 op1
= plus_constant (mode
, sym
, addend_hi
);
1361 if (reload_completed
)
1363 /* We really should have taken care of this offset earlier. */
1364 gcc_assert (addend
== 0);
1365 if (ia64_expand_load_address (op0
, op1
))
1371 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1373 emit_insn (gen_rtx_SET (subtarget
, op1
));
1375 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1376 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1385 /* Split a move from OP1 to OP0 conditional on COND. */
1388 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1390 rtx_insn
*insn
, *first
= get_last_insn ();
1392 emit_move_insn (op0
, op1
);
1394 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1396 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1400 /* Split a post-reload TImode or TFmode reference into two DImode
1401 components. This is made extra difficult by the fact that we do
1402 not get any scratch registers to work with, because reload cannot
1403 be prevented from giving us a scratch that overlaps the register
1404 pair involved. So instead, when addressing memory, we tweak the
1405 pointer register up and back down with POST_INCs. Or up and not
1406 back down when we can get away with it.
1408 REVERSED is true when the loads must be done in reversed order
1409 (high word first) for correctness. DEAD is true when the pointer
1410 dies with the second insn we generate and therefore the second
1411 address must not carry a postmodify.
1413 May return an insn which is to be emitted after the moves. */
1416 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1420 switch (GET_CODE (in
))
1423 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1424 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1429 /* Cannot occur reversed. */
1430 gcc_assert (!reversed
);
1432 if (GET_MODE (in
) != TFmode
)
1433 split_double (in
, &out
[0], &out
[1]);
1435 /* split_double does not understand how to split a TFmode
1436 quantity into a pair of DImode constants. */
1438 unsigned HOST_WIDE_INT p
[2];
1439 long l
[4]; /* TFmode is 128 bits */
1441 real_to_target (l
, CONST_DOUBLE_REAL_VALUE (in
), TFmode
);
1443 if (FLOAT_WORDS_BIG_ENDIAN
)
1445 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1446 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1450 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1451 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1453 out
[0] = GEN_INT (p
[0]);
1454 out
[1] = GEN_INT (p
[1]);
1460 rtx base
= XEXP (in
, 0);
1463 switch (GET_CODE (base
))
1468 out
[0] = adjust_automodify_address
1469 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1470 out
[1] = adjust_automodify_address
1471 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1475 /* Reversal requires a pre-increment, which can only
1476 be done as a separate insn. */
1477 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1478 out
[0] = adjust_automodify_address
1479 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1480 out
[1] = adjust_address (in
, DImode
, 0);
1485 gcc_assert (!reversed
&& !dead
);
1487 /* Just do the increment in two steps. */
1488 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1489 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1493 gcc_assert (!reversed
&& !dead
);
1495 /* Add 8, subtract 24. */
1496 base
= XEXP (base
, 0);
1497 out
[0] = adjust_automodify_address
1498 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1499 out
[1] = adjust_automodify_address
1501 gen_rtx_POST_MODIFY (Pmode
, base
,
1502 plus_constant (Pmode
, base
, -24)),
1507 gcc_assert (!reversed
&& !dead
);
1509 /* Extract and adjust the modification. This case is
1510 trickier than the others, because we might have an
1511 index register, or we might have a combined offset that
1512 doesn't fit a signed 9-bit displacement field. We can
1513 assume the incoming expression is already legitimate. */
1514 offset
= XEXP (base
, 1);
1515 base
= XEXP (base
, 0);
1517 out
[0] = adjust_automodify_address
1518 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1520 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1522 /* Can't adjust the postmodify to match. Emit the
1523 original, then a separate addition insn. */
1524 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1525 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1529 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1530 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1532 /* Again the postmodify cannot be made to match,
1533 but in this case it's more efficient to get rid
1534 of the postmodify entirely and fix up with an
1536 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1538 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1542 /* Combined offset still fits in the displacement field.
1543 (We cannot overflow it at the high end.) */
1544 out
[1] = adjust_automodify_address
1545 (in
, DImode
, gen_rtx_POST_MODIFY
1546 (Pmode
, base
, gen_rtx_PLUS
1548 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1567 /* Split a TImode or TFmode move instruction after reload.
1568 This is used by *movtf_internal and *movti_internal. */
1570 ia64_split_tmode_move (rtx operands
[])
1572 rtx in
[2], out
[2], insn
;
1575 bool reversed
= false;
1577 /* It is possible for reload to decide to overwrite a pointer with
1578 the value it points to. In that case we have to do the loads in
1579 the appropriate order so that the pointer is not destroyed too
1580 early. Also we must not generate a postmodify for that second
1581 load, or rws_access_regno will die. And we must not generate a
1582 postmodify for the second load if the destination register
1583 overlaps with the base register. */
1584 if (GET_CODE (operands
[1]) == MEM
1585 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1587 rtx base
= XEXP (operands
[1], 0);
1588 while (GET_CODE (base
) != REG
)
1589 base
= XEXP (base
, 0);
1591 if (REGNO (base
) == REGNO (operands
[0]))
1594 if (refers_to_regno_p (REGNO (operands
[0]),
1595 REGNO (operands
[0])+2,
1599 /* Another reason to do the moves in reversed order is if the first
1600 element of the target register pair is also the second element of
1601 the source register pair. */
1602 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1603 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1606 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1607 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1609 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1610 if (GET_CODE (EXP) == MEM \
1611 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1612 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1613 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1614 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1616 insn
= emit_insn (gen_rtx_SET (out
[0], in
[0]));
1617 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1618 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1620 insn
= emit_insn (gen_rtx_SET (out
[1], in
[1]));
1621 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1622 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1625 emit_insn (fixup
[0]);
1627 emit_insn (fixup
[1]);
1629 #undef MAYBE_ADD_REG_INC_NOTE
1632 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1633 through memory plus an extra GR scratch register. Except that you can
1634 either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1635 from SECONDARY_RELOAD_CLASS, but not both.
1637 We got into problems in the first place by allowing a construct like
1638 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1639 This solution attempts to prevent this situation from occurring. When
1640 we see something like the above, we spill the inner register to memory. */
1643 spill_xfmode_rfmode_operand (rtx in
, int force
, machine_mode mode
)
1645 if (GET_CODE (in
) == SUBREG
1646 && GET_MODE (SUBREG_REG (in
)) == TImode
1647 && GET_CODE (SUBREG_REG (in
)) == REG
)
1649 rtx memt
= assign_stack_temp (TImode
, 16);
1650 emit_move_insn (memt
, SUBREG_REG (in
));
1651 return adjust_address (memt
, mode
, 0);
1653 else if (force
&& GET_CODE (in
) == REG
)
1655 rtx memx
= assign_stack_temp (mode
, 16);
1656 emit_move_insn (memx
, in
);
1663 /* Expand the movxf or movrf pattern (MODE says which) with the given
1664 OPERANDS, returning true if the pattern should then invoke
1668 ia64_expand_movxf_movrf (machine_mode mode
, rtx operands
[])
1670 rtx op0
= operands
[0];
1672 if (GET_CODE (op0
) == SUBREG
)
1673 op0
= SUBREG_REG (op0
);
1675 /* We must support XFmode loads into general registers for stdarg/vararg,
1676 unprototyped calls, and a rare case where a long double is passed as
1677 an argument after a float HFA fills the FP registers. We split them into
1678 DImode loads for convenience. We also need to support XFmode stores
1679 for the last case. This case does not happen for stdarg/vararg routines,
1680 because we do a block store to memory of unnamed arguments. */
1682 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1686 /* We're hoping to transform everything that deals with XFmode
1687 quantities and GR registers early in the compiler. */
1688 gcc_assert (can_create_pseudo_p ());
1690 /* Struct to register can just use TImode instead. */
1691 if ((GET_CODE (operands
[1]) == SUBREG
1692 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1693 || (GET_CODE (operands
[1]) == REG
1694 && GR_REGNO_P (REGNO (operands
[1]))))
1696 rtx op1
= operands
[1];
1698 if (GET_CODE (op1
) == SUBREG
)
1699 op1
= SUBREG_REG (op1
);
1701 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1703 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1707 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1709 /* Don't word-swap when reading in the constant. */
1710 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1711 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1713 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1714 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1719 /* If the quantity is in a register not known to be GR, spill it. */
1720 if (register_operand (operands
[1], mode
))
1721 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1723 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1725 /* Don't word-swap when reading in the value. */
1726 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1727 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1729 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1730 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1734 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1736 /* We're hoping to transform everything that deals with XFmode
1737 quantities and GR registers early in the compiler. */
1738 gcc_assert (can_create_pseudo_p ());
1740 /* Op0 can't be a GR_REG here, as that case is handled above.
1741 If op0 is a register, then we spill op1, so that we now have a
1742 MEM operand. This requires creating an XFmode subreg of a TImode reg
1743 to force the spill. */
1744 if (register_operand (operands
[0], mode
))
1746 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1747 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1748 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1755 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1757 /* Don't word-swap when writing out the value. */
1758 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1759 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1761 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1762 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1767 if (!reload_in_progress
&& !reload_completed
)
1769 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1771 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1773 rtx memt
, memx
, in
= operands
[1];
1774 if (CONSTANT_P (in
))
1775 in
= validize_mem (force_const_mem (mode
, in
));
1776 if (GET_CODE (in
) == MEM
)
1777 memt
= adjust_address (in
, TImode
, 0);
1780 memt
= assign_stack_temp (TImode
, 16);
1781 memx
= adjust_address (memt
, mode
, 0);
1782 emit_move_insn (memx
, in
);
1784 emit_move_insn (op0
, memt
);
1788 if (!ia64_move_ok (operands
[0], operands
[1]))
1789 operands
[1] = force_reg (mode
, operands
[1]);
1795 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1796 with the expression that holds the compare result (in VOIDmode). */
1798 static GTY(()) rtx cmptf_libfunc
;
1801 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1803 enum rtx_code code
= GET_CODE (*expr
);
1806 /* If we have a BImode input, then we already have a compare result, and
1807 do not need to emit another comparison. */
1808 if (GET_MODE (*op0
) == BImode
)
1810 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1813 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1814 magic number as its third argument, that indicates what to do.
1815 The return value is an integer to be compared against zero. */
1816 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1819 QCMP_INV
= 1, /* Raise FP_INVALID on NaNs as a side effect. */
1826 enum rtx_code ncode
;
1829 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1832 /* 1 = equal, 0 = not equal. Equality operators do
1833 not raise FP_INVALID when given a NaN operand. */
1834 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1835 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1836 /* isunordered() from C99. */
1837 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1838 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1839 /* Relational operators raise FP_INVALID when given
1841 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1842 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1843 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1844 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1845 /* Unordered relational operators do not raise FP_INVALID
1846 when given a NaN operand. */
1847 case UNLT
: magic
= QCMP_LT
|QCMP_UNORD
; ncode
= NE
; break;
1848 case UNLE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1849 case UNGT
: magic
= QCMP_GT
|QCMP_UNORD
; ncode
= NE
; break;
1850 case UNGE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1851 /* Not supported. */
1854 default: gcc_unreachable ();
1859 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
,
1860 *op0
, TFmode
, *op1
, TFmode
,
1861 GEN_INT (magic
), DImode
);
1862 cmp
= gen_reg_rtx (BImode
);
1863 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (ncode
, BImode
,
1866 rtx_insn
*insns
= get_insns ();
1869 emit_libcall_block (insns
, cmp
, cmp
,
1870 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1875 cmp
= gen_reg_rtx (BImode
);
1876 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1880 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1885 /* Generate an integral vector comparison. Return true if the condition has
1886 been reversed, and so the sense of the comparison should be inverted. */
1889 ia64_expand_vecint_compare (enum rtx_code code
, machine_mode mode
,
1890 rtx dest
, rtx op0
, rtx op1
)
1892 bool negate
= false;
1895 /* Canonicalize the comparison to EQ, GT, GTU. */
1906 code
= reverse_condition (code
);
1912 code
= reverse_condition (code
);
1918 code
= swap_condition (code
);
1919 x
= op0
, op0
= op1
, op1
= x
;
1926 /* Unsigned parallel compare is not supported by the hardware. Play some
1927 tricks to turn this into a signed comparison against 0. */
1936 /* Subtract (-(INT MAX) - 1) from both operands to make
1938 mask
= gen_int_mode (0x80000000, SImode
);
1939 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1940 mask
= force_reg (mode
, mask
);
1941 t1
= gen_reg_rtx (mode
);
1942 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1943 t2
= gen_reg_rtx (mode
);
1944 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1953 /* Perform a parallel unsigned saturating subtraction. */
1954 x
= gen_reg_rtx (mode
);
1955 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, op0
, op1
)));
1959 op1
= CONST0_RTX (mode
);
1968 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1969 emit_insn (gen_rtx_SET (dest
, x
));
1974 /* Emit an integral vector conditional move. */
1977 ia64_expand_vecint_cmov (rtx operands
[])
1979 machine_mode mode
= GET_MODE (operands
[0]);
1980 enum rtx_code code
= GET_CODE (operands
[3]);
1984 cmp
= gen_reg_rtx (mode
);
1985 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1986 operands
[4], operands
[5]);
1988 ot
= operands
[1+negate
];
1989 of
= operands
[2-negate
];
1991 if (ot
== CONST0_RTX (mode
))
1993 if (of
== CONST0_RTX (mode
))
1995 emit_move_insn (operands
[0], ot
);
1999 x
= gen_rtx_NOT (mode
, cmp
);
2000 x
= gen_rtx_AND (mode
, x
, of
);
2001 emit_insn (gen_rtx_SET (operands
[0], x
));
2003 else if (of
== CONST0_RTX (mode
))
2005 x
= gen_rtx_AND (mode
, cmp
, ot
);
2006 emit_insn (gen_rtx_SET (operands
[0], x
));
2012 t
= gen_reg_rtx (mode
);
2013 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
2014 emit_insn (gen_rtx_SET (t
, x
));
2016 f
= gen_reg_rtx (mode
);
2017 x
= gen_rtx_NOT (mode
, cmp
);
2018 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
2019 emit_insn (gen_rtx_SET (f
, x
));
2021 x
= gen_rtx_IOR (mode
, t
, f
);
2022 emit_insn (gen_rtx_SET (operands
[0], x
));
2026 /* Emit an integral vector min or max operation. Return true if all done. */
2029 ia64_expand_vecint_minmax (enum rtx_code code
, machine_mode mode
,
2034 /* These four combinations are supported directly. */
2035 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
2037 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
2040 /* This combination can be implemented with only saturating subtraction. */
2041 if (mode
== V4HImode
&& code
== UMAX
)
2043 rtx x
, tmp
= gen_reg_rtx (mode
);
2045 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
2046 emit_insn (gen_rtx_SET (tmp
, x
));
2048 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
2052 /* Everything else implemented via vector comparisons. */
2053 xops
[0] = operands
[0];
2054 xops
[4] = xops
[1] = operands
[1];
2055 xops
[5] = xops
[2] = operands
[2];
2074 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
2076 ia64_expand_vecint_cmov (xops
);
2080 /* The vectors LO and HI each contain N halves of a double-wide vector.
2081 Reassemble either the first N/2 or the second N/2 elements. */
2084 ia64_unpack_assemble (rtx out
, rtx lo
, rtx hi
, bool highp
)
2086 machine_mode vmode
= GET_MODE (lo
);
2087 unsigned int i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
2088 struct expand_vec_perm_d d
;
2091 d
.target
= gen_lowpart (vmode
, out
);
2092 d
.op0
= (TARGET_BIG_ENDIAN
? hi
: lo
);
2093 d
.op1
= (TARGET_BIG_ENDIAN
? lo
: hi
);
2096 d
.one_operand_p
= false;
2097 d
.testing_p
= false;
2099 high
= (highp
? nelt
/ 2 : 0);
2100 for (i
= 0; i
< nelt
/ 2; ++i
)
2102 d
.perm
[i
* 2] = i
+ high
;
2103 d
.perm
[i
* 2 + 1] = i
+ high
+ nelt
;
2106 ok
= ia64_expand_vec_perm_const_1 (&d
);
2110 /* Return a vector of the sign-extension of VEC. */
2113 ia64_unpack_sign (rtx vec
, bool unsignedp
)
2115 machine_mode mode
= GET_MODE (vec
);
2116 rtx zero
= CONST0_RTX (mode
);
2122 rtx sign
= gen_reg_rtx (mode
);
2125 neg
= ia64_expand_vecint_compare (LT
, mode
, sign
, vec
, zero
);
2132 /* Emit an integral vector unpack operation. */
2135 ia64_expand_unpack (rtx operands
[3], bool unsignedp
, bool highp
)
2137 rtx sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2138 ia64_unpack_assemble (operands
[0], operands
[1], sign
, highp
);
2141 /* Emit an integral vector widening sum operations. */
2144 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
2149 sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2151 wmode
= GET_MODE (operands
[0]);
2152 l
= gen_reg_rtx (wmode
);
2153 h
= gen_reg_rtx (wmode
);
2155 ia64_unpack_assemble (l
, operands
[1], sign
, false);
2156 ia64_unpack_assemble (h
, operands
[1], sign
, true);
2158 t
= expand_binop (wmode
, add_optab
, l
, operands
[2], NULL
, 0, OPTAB_DIRECT
);
2159 t
= expand_binop (wmode
, add_optab
, h
, t
, operands
[0], 0, OPTAB_DIRECT
);
2160 if (t
!= operands
[0])
2161 emit_move_insn (operands
[0], t
);
2164 /* Emit the appropriate sequence for a call. */
2167 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
2172 addr
= XEXP (addr
, 0);
2173 addr
= convert_memory_address (DImode
, addr
);
2174 b0
= gen_rtx_REG (DImode
, R_BR (0));
2176 /* ??? Should do this for functions known to bind local too. */
2177 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2180 insn
= gen_sibcall_nogp (addr
);
2182 insn
= gen_call_nogp (addr
, b0
);
2184 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2185 insn
= emit_call_insn (insn
);
2190 insn
= gen_sibcall_gp (addr
);
2192 insn
= gen_call_gp (addr
, b0
);
2194 insn
= gen_call_value_gp (retval
, addr
, b0
);
2195 insn
= emit_call_insn (insn
);
2197 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2201 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2203 if (TARGET_ABI_OPEN_VMS
)
2204 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2205 gen_rtx_REG (DImode
, GR_REG (25)));
2209 reg_emitted (enum ia64_frame_regs r
)
2211 if (emitted_frame_related_regs
[r
] == 0)
2212 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2214 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2218 get_reg (enum ia64_frame_regs r
)
2221 return current_frame_info
.r
[r
];
2225 is_emitted (int regno
)
2229 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2230 if (emitted_frame_related_regs
[r
] == regno
)
2236 ia64_reload_gp (void)
2240 if (current_frame_info
.r
[reg_save_gp
])
2242 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2246 HOST_WIDE_INT offset
;
2249 offset
= (current_frame_info
.spill_cfa_off
2250 + current_frame_info
.spill_size
);
2251 if (frame_pointer_needed
)
2253 tmp
= hard_frame_pointer_rtx
;
2258 tmp
= stack_pointer_rtx
;
2259 offset
= current_frame_info
.total_size
- offset
;
2262 offset_r
= GEN_INT (offset
);
2263 if (satisfies_constraint_I (offset_r
))
2264 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2267 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2268 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2269 pic_offset_table_rtx
, tmp
));
2272 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2275 emit_move_insn (pic_offset_table_rtx
, tmp
);
2279 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2280 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2283 bool is_desc
= false;
2285 /* If we find we're calling through a register, then we're actually
2286 calling through a descriptor, so load up the values. */
2287 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2292 /* ??? We are currently constrained to *not* use peep2, because
2293 we can legitimately change the global lifetime of the GP
2294 (in the form of killing where previously live). This is
2295 because a call through a descriptor doesn't use the previous
2296 value of the GP, while a direct call does, and we do not
2297 commit to either form until the split here.
2299 That said, this means that we lack precise life info for
2300 whether ADDR is dead after this call. This is not terribly
2301 important, since we can fix things up essentially for free
2302 with the POST_DEC below, but it's nice to not use it when we
2303 can immediately tell it's not necessary. */
2304 addr_dead_p
= ((noreturn_p
|| sibcall_p
2305 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2307 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2309 /* Load the code address into scratch_b. */
2310 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2311 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2312 emit_move_insn (scratch_r
, tmp
);
2313 emit_move_insn (scratch_b
, scratch_r
);
2315 /* Load the GP address. If ADDR is not dead here, then we must
2316 revert the change made above via the POST_INCREMENT. */
2318 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2321 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2322 emit_move_insn (pic_offset_table_rtx
, tmp
);
2329 insn
= gen_sibcall_nogp (addr
);
2331 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2333 insn
= gen_call_nogp (addr
, retaddr
);
2334 emit_call_insn (insn
);
2336 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2340 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2342 This differs from the generic code in that we know about the zero-extending
2343 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2344 also know that ld.acq+cmpxchg.rel equals a full barrier.
2346 The loop we want to generate looks like
2351 new_reg = cmp_reg op val;
2352 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2353 if (cmp_reg != old_reg)
2356 Note that we only do the plain load from memory once. Subsequent
2357 iterations use the value loaded by the compare-and-swap pattern. */
2360 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2361 rtx old_dst
, rtx new_dst
, enum memmodel model
)
2363 machine_mode mode
= GET_MODE (mem
);
2364 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2365 enum insn_code icode
;
2367 /* Special case for using fetchadd. */
2368 if ((mode
== SImode
|| mode
== DImode
)
2369 && (code
== PLUS
|| code
== MINUS
)
2370 && fetchadd_operand (val
, mode
))
2373 val
= GEN_INT (-INTVAL (val
));
2376 old_dst
= gen_reg_rtx (mode
);
2380 case MEMMODEL_ACQ_REL
:
2381 case MEMMODEL_SEQ_CST
:
2382 case MEMMODEL_SYNC_SEQ_CST
:
2383 emit_insn (gen_memory_barrier ());
2385 case MEMMODEL_RELAXED
:
2386 case MEMMODEL_ACQUIRE
:
2387 case MEMMODEL_SYNC_ACQUIRE
:
2388 case MEMMODEL_CONSUME
:
2390 icode
= CODE_FOR_fetchadd_acq_si
;
2392 icode
= CODE_FOR_fetchadd_acq_di
;
2394 case MEMMODEL_RELEASE
:
2395 case MEMMODEL_SYNC_RELEASE
:
2397 icode
= CODE_FOR_fetchadd_rel_si
;
2399 icode
= CODE_FOR_fetchadd_rel_di
;
2406 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2410 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2412 if (new_reg
!= new_dst
)
2413 emit_move_insn (new_dst
, new_reg
);
2418 /* Because of the volatile mem read, we get an ld.acq, which is the
2419 front half of the full barrier. The end half is the cmpxchg.rel.
2420 For relaxed and release memory models, we don't need this. But we
2421 also don't bother trying to prevent it either. */
2422 gcc_assert (is_mm_relaxed (model
) || is_mm_release (model
)
2423 || MEM_VOLATILE_P (mem
));
2425 old_reg
= gen_reg_rtx (DImode
);
2426 cmp_reg
= gen_reg_rtx (DImode
);
2427 label
= gen_label_rtx ();
2431 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2432 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2435 emit_move_insn (cmp_reg
, mem
);
2439 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2440 emit_move_insn (old_reg
, cmp_reg
);
2441 emit_move_insn (ar_ccv
, cmp_reg
);
2444 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2449 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2450 true, OPTAB_DIRECT
);
2451 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2454 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2455 true, OPTAB_DIRECT
);
2458 new_reg
= gen_lowpart (mode
, new_reg
);
2460 emit_move_insn (new_dst
, new_reg
);
2464 case MEMMODEL_RELAXED
:
2465 case MEMMODEL_ACQUIRE
:
2466 case MEMMODEL_SYNC_ACQUIRE
:
2467 case MEMMODEL_CONSUME
:
2470 case E_QImode
: icode
= CODE_FOR_cmpxchg_acq_qi
; break;
2471 case E_HImode
: icode
= CODE_FOR_cmpxchg_acq_hi
; break;
2472 case E_SImode
: icode
= CODE_FOR_cmpxchg_acq_si
; break;
2473 case E_DImode
: icode
= CODE_FOR_cmpxchg_acq_di
; break;
2479 case MEMMODEL_RELEASE
:
2480 case MEMMODEL_SYNC_RELEASE
:
2481 case MEMMODEL_ACQ_REL
:
2482 case MEMMODEL_SEQ_CST
:
2483 case MEMMODEL_SYNC_SEQ_CST
:
2486 case E_QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2487 case E_HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2488 case E_SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2489 case E_DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2499 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2501 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2504 /* Begin the assembly file. */
2507 ia64_file_start (void)
2509 default_file_start ();
2510 emit_safe_across_calls ();
2514 emit_safe_across_calls (void)
2516 unsigned int rs
, re
;
2523 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2527 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2531 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2535 fputc (',', asm_out_file
);
2537 fprintf (asm_out_file
, "p%u", rs
);
2539 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2543 fputc ('\n', asm_out_file
);
2546 /* Globalize a declaration. */
2549 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2551 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2552 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2555 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2556 const char *p
= TREE_STRING_POINTER (v
);
2557 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2559 targetm
.asm_out
.globalize_label (stream
, name
);
2560 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2561 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2564 /* Helper function for ia64_compute_frame_size: find an appropriate general
2565 register to spill some special register to. SPECIAL_SPILL_MASK contains
2566 bits in GR0 to GR31 that have already been allocated by this routine.
2567 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2570 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2574 if (emitted_frame_related_regs
[r
] != 0)
2576 regno
= emitted_frame_related_regs
[r
];
2577 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2578 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2579 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2580 else if (crtl
->is_leaf
2581 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2582 current_frame_info
.gr_used_mask
|= 1 << regno
;
2587 /* If this is a leaf function, first try an otherwise unused
2588 call-clobbered register. */
2591 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2592 if (! df_regs_ever_live_p (regno
)
2593 && call_used_regs
[regno
]
2594 && ! fixed_regs
[regno
]
2595 && ! global_regs
[regno
]
2596 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2597 && ! is_emitted (regno
))
2599 current_frame_info
.gr_used_mask
|= 1 << regno
;
2606 regno
= current_frame_info
.n_local_regs
;
2607 /* If there is a frame pointer, then we can't use loc79, because
2608 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2609 reg_name switching code in ia64_expand_prologue. */
2610 while (regno
< (80 - frame_pointer_needed
))
2611 if (! is_emitted (LOC_REG (regno
++)))
2613 current_frame_info
.n_local_regs
= regno
;
2614 return LOC_REG (regno
- 1);
2618 /* Failed to find a general register to spill to. Must use stack. */
2622 /* In order to make for nice schedules, we try to allocate every temporary
2623 to a different register. We must of course stay away from call-saved,
2624 fixed, and global registers. We must also stay away from registers
2625 allocated in current_frame_info.gr_used_mask, since those include regs
2626 used all through the prologue.
2628 Any register allocated here must be used immediately. The idea is to
2629 aid scheduling, not to solve data flow problems. */
2631 static int last_scratch_gr_reg
;
2634 next_scratch_gr_reg (void)
2638 for (i
= 0; i
< 32; ++i
)
2640 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2641 if (call_used_regs
[regno
]
2642 && ! fixed_regs
[regno
]
2643 && ! global_regs
[regno
]
2644 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2646 last_scratch_gr_reg
= regno
;
2651 /* There must be _something_ available. */
2655 /* Helper function for ia64_compute_frame_size, called through
2656 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2659 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2661 unsigned int regno
= REGNO (reg
);
2664 unsigned int i
, n
= REG_NREGS (reg
);
2665 for (i
= 0; i
< n
; ++i
)
2666 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2671 /* Returns the number of bytes offset between the frame pointer and the stack
2672 pointer for the current function. SIZE is the number of bytes of space
2673 needed for local variables. */
2676 ia64_compute_frame_size (HOST_WIDE_INT size
)
2678 HOST_WIDE_INT total_size
;
2679 HOST_WIDE_INT spill_size
= 0;
2680 HOST_WIDE_INT extra_spill_size
= 0;
2681 HOST_WIDE_INT pretend_args_size
;
2684 int spilled_gr_p
= 0;
2685 int spilled_fr_p
= 0;
2691 if (current_frame_info
.initialized
)
2694 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2695 CLEAR_HARD_REG_SET (mask
);
2697 /* Don't allocate scratches to the return register. */
2698 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2700 /* Don't allocate scratches to the EH scratch registers. */
2701 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2702 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2703 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2704 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2706 /* Static stack checking uses r2 and r3. */
2707 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
2708 current_frame_info
.gr_used_mask
|= 0xc;
2710 /* Find the size of the register stack frame. We have only 80 local
2711 registers, because we reserve 8 for the inputs and 8 for the
2714 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2715 since we'll be adjusting that down later. */
2716 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2717 for (; regno
>= LOC_REG (0); regno
--)
2718 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2720 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2722 /* For functions marked with the syscall_linkage attribute, we must mark
2723 all eight input registers as in use, so that locals aren't visible to
2726 if (cfun
->machine
->n_varargs
> 0
2727 || lookup_attribute ("syscall_linkage",
2728 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2729 current_frame_info
.n_input_regs
= 8;
2732 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2733 if (df_regs_ever_live_p (regno
))
2735 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2738 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2739 if (df_regs_ever_live_p (regno
))
2741 i
= regno
- OUT_REG (0) + 1;
2743 #ifndef PROFILE_HOOK
2744 /* When -p profiling, we need one output register for the mcount argument.
2745 Likewise for -a profiling for the bb_init_func argument. For -ax
2746 profiling, we need two output registers for the two bb_init_trace_func
2751 current_frame_info
.n_output_regs
= i
;
2753 /* ??? No rotating register support yet. */
2754 current_frame_info
.n_rotate_regs
= 0;
2756 /* Discover which registers need spilling, and how much room that
2757 will take. Begin with floating point and general registers,
2758 which will always wind up on the stack. */
2760 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2761 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2763 SET_HARD_REG_BIT (mask
, regno
);
2769 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2770 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2772 SET_HARD_REG_BIT (mask
, regno
);
2778 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2779 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2781 SET_HARD_REG_BIT (mask
, regno
);
2786 /* Now come all special registers that might get saved in other
2787 general registers. */
2789 if (frame_pointer_needed
)
2791 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2792 /* If we did not get a register, then we take LOC79. This is guaranteed
2793 to be free, even if regs_ever_live is already set, because this is
2794 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2795 as we don't count loc79 above. */
2796 if (current_frame_info
.r
[reg_fp
] == 0)
2798 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2799 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2803 if (! crtl
->is_leaf
)
2805 /* Emit a save of BR0 if we call other functions. Do this even
2806 if this function doesn't return, as EH depends on this to be
2807 able to unwind the stack. */
2808 SET_HARD_REG_BIT (mask
, BR_REG (0));
2810 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2811 if (current_frame_info
.r
[reg_save_b0
] == 0)
2813 extra_spill_size
+= 8;
2817 /* Similarly for ar.pfs. */
2818 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2819 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2820 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2822 extra_spill_size
+= 8;
2826 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2827 registers are clobbered, so we fall back to the stack. */
2828 current_frame_info
.r
[reg_save_gp
]
2829 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2830 if (current_frame_info
.r
[reg_save_gp
] == 0)
2832 SET_HARD_REG_BIT (mask
, GR_REG (1));
2839 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2841 SET_HARD_REG_BIT (mask
, BR_REG (0));
2842 extra_spill_size
+= 8;
2846 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2848 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2849 current_frame_info
.r
[reg_save_ar_pfs
]
2850 = find_gr_spill (reg_save_ar_pfs
, 1);
2851 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2853 extra_spill_size
+= 8;
2859 /* Unwind descriptor hackery: things are most efficient if we allocate
2860 consecutive GR save registers for RP, PFS, FP in that order. However,
2861 it is absolutely critical that FP get the only hard register that's
2862 guaranteed to be free, so we allocated it first. If all three did
2863 happen to be allocated hard regs, and are consecutive, rearrange them
2864 into the preferred order now.
2866 If we have already emitted code for any of those registers,
2867 then it's already too late to change. */
2868 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2869 MIN (current_frame_info
.r
[reg_save_b0
],
2870 current_frame_info
.r
[reg_save_ar_pfs
]));
2871 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2872 MAX (current_frame_info
.r
[reg_save_b0
],
2873 current_frame_info
.r
[reg_save_ar_pfs
]));
2875 && min_regno
+ 2 == max_regno
2876 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2877 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2878 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2879 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2880 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2881 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2882 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2883 && (emitted_frame_related_regs
[reg_fp
] == 0
2884 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2886 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2887 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2888 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2891 /* See if we need to store the predicate register block. */
2892 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2893 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2895 if (regno
<= PR_REG (63))
2897 SET_HARD_REG_BIT (mask
, PR_REG (0));
2898 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2899 if (current_frame_info
.r
[reg_save_pr
] == 0)
2901 extra_spill_size
+= 8;
2905 /* ??? Mark them all as used so that register renaming and such
2906 are free to use them. */
2907 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2908 df_set_regs_ever_live (regno
, true);
2911 /* If we're forced to use st8.spill, we're forced to save and restore
2912 ar.unat as well. The check for existing liveness allows inline asm
2913 to touch ar.unat. */
2914 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2915 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2917 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2918 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2919 current_frame_info
.r
[reg_save_ar_unat
]
2920 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2921 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2923 extra_spill_size
+= 8;
2928 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2930 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2931 current_frame_info
.r
[reg_save_ar_lc
]
2932 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2933 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2935 extra_spill_size
+= 8;
2940 /* If we have an odd number of words of pretend arguments written to
2941 the stack, then the FR save area will be unaligned. We round the
2942 size of this area up to keep things 16 byte aligned. */
2944 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2946 pretend_args_size
= crtl
->args
.pretend_args_size
;
2948 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2949 + crtl
->outgoing_args_size
);
2950 total_size
= IA64_STACK_ALIGN (total_size
);
2952 /* We always use the 16-byte scratch area provided by the caller, but
2953 if we are a leaf function, there's no one to which we need to provide
2954 a scratch area. However, if the function allocates dynamic stack space,
2955 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2956 so we need to cope. */
2957 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
2958 total_size
= MAX (0, total_size
- 16);
2960 current_frame_info
.total_size
= total_size
;
2961 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2962 current_frame_info
.spill_size
= spill_size
;
2963 current_frame_info
.extra_spill_size
= extra_spill_size
;
2964 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2965 current_frame_info
.n_spilled
= n_spilled
;
2966 current_frame_info
.initialized
= reload_completed
;
2969 /* Worker function for TARGET_CAN_ELIMINATE. */
2972 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2974 return (to
== BR_REG (0) ? crtl
->is_leaf
: true);
2977 /* Compute the initial difference between the specified pair of registers. */
2980 ia64_initial_elimination_offset (int from
, int to
)
2982 HOST_WIDE_INT offset
;
2984 ia64_compute_frame_size (get_frame_size ());
2987 case FRAME_POINTER_REGNUM
:
2990 case HARD_FRAME_POINTER_REGNUM
:
2991 offset
= -current_frame_info
.total_size
;
2992 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2993 offset
+= 16 + crtl
->outgoing_args_size
;
2996 case STACK_POINTER_REGNUM
:
2998 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2999 offset
+= 16 + crtl
->outgoing_args_size
;
3007 case ARG_POINTER_REGNUM
:
3008 /* Arguments start above the 16 byte save area, unless stdarg
3009 in which case we store through the 16 byte save area. */
3012 case HARD_FRAME_POINTER_REGNUM
:
3013 offset
= 16 - crtl
->args
.pretend_args_size
;
3016 case STACK_POINTER_REGNUM
:
3017 offset
= (current_frame_info
.total_size
3018 + 16 - crtl
->args
.pretend_args_size
);
3033 /* If there are more than a trivial number of register spills, we use
3034 two interleaved iterators so that we can get two memory references
3037 In order to simplify things in the prologue and epilogue expanders,
3038 we use helper functions to fix up the memory references after the
3039 fact with the appropriate offsets to a POST_MODIFY memory mode.
3040 The following data structure tracks the state of the two iterators
3041 while insns are being emitted. */
3043 struct spill_fill_data
3045 rtx_insn
*init_after
; /* point at which to emit initializations */
3046 rtx init_reg
[2]; /* initial base register */
3047 rtx iter_reg
[2]; /* the iterator registers */
3048 rtx
*prev_addr
[2]; /* address of last memory use */
3049 rtx_insn
*prev_insn
[2]; /* the insn corresponding to prev_addr */
3050 HOST_WIDE_INT prev_off
[2]; /* last offset */
3051 int n_iter
; /* number of iterators in use */
3052 int next_iter
; /* next iterator to use */
3053 unsigned int save_gr_used_mask
;
3056 static struct spill_fill_data spill_fill_data
;
3059 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
3063 spill_fill_data
.init_after
= get_last_insn ();
3064 spill_fill_data
.init_reg
[0] = init_reg
;
3065 spill_fill_data
.init_reg
[1] = init_reg
;
3066 spill_fill_data
.prev_addr
[0] = NULL
;
3067 spill_fill_data
.prev_addr
[1] = NULL
;
3068 spill_fill_data
.prev_insn
[0] = NULL
;
3069 spill_fill_data
.prev_insn
[1] = NULL
;
3070 spill_fill_data
.prev_off
[0] = cfa_off
;
3071 spill_fill_data
.prev_off
[1] = cfa_off
;
3072 spill_fill_data
.next_iter
= 0;
3073 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
3075 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
3076 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
3078 int regno
= next_scratch_gr_reg ();
3079 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
3080 current_frame_info
.gr_used_mask
|= 1 << regno
;
3085 finish_spill_pointers (void)
3087 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
3091 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
3093 int iter
= spill_fill_data
.next_iter
;
3094 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
3095 rtx disp_rtx
= GEN_INT (disp
);
3098 if (spill_fill_data
.prev_addr
[iter
])
3100 if (satisfies_constraint_N (disp_rtx
))
3102 *spill_fill_data
.prev_addr
[iter
]
3103 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
3104 gen_rtx_PLUS (DImode
,
3105 spill_fill_data
.iter_reg
[iter
],
3107 add_reg_note (spill_fill_data
.prev_insn
[iter
],
3108 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
3112 /* ??? Could use register post_modify for loads. */
3113 if (!satisfies_constraint_I (disp_rtx
))
3115 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3116 emit_move_insn (tmp
, disp_rtx
);
3119 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3120 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
3123 /* Micro-optimization: if we've created a frame pointer, it's at
3124 CFA 0, which may allow the real iterator to be initialized lower,
3125 slightly increasing parallelism. Also, if there are few saves
3126 it may eliminate the iterator entirely. */
3128 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
3129 && frame_pointer_needed
)
3131 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
3132 set_mem_alias_set (mem
, get_varargs_alias_set ());
3141 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
3142 spill_fill_data
.init_reg
[iter
]);
3147 if (!satisfies_constraint_I (disp_rtx
))
3149 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3150 emit_move_insn (tmp
, disp_rtx
);
3154 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3155 spill_fill_data
.init_reg
[iter
],
3162 /* Careful for being the first insn in a sequence. */
3163 if (spill_fill_data
.init_after
)
3164 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
3167 rtx_insn
*first
= get_insns ();
3169 insn
= emit_insn_before (seq
, first
);
3171 insn
= emit_insn (seq
);
3173 spill_fill_data
.init_after
= insn
;
3176 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
3178 /* ??? Not all of the spills are for varargs, but some of them are.
3179 The rest of the spills belong in an alias set of their own. But
3180 it doesn't actually hurt to include them here. */
3181 set_mem_alias_set (mem
, get_varargs_alias_set ());
3183 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
3184 spill_fill_data
.prev_off
[iter
] = cfa_off
;
3186 if (++iter
>= spill_fill_data
.n_iter
)
3188 spill_fill_data
.next_iter
= iter
;
3194 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
3197 int iter
= spill_fill_data
.next_iter
;
3201 mem
= spill_restore_mem (reg
, cfa_off
);
3202 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
3203 spill_fill_data
.prev_insn
[iter
] = insn
;
3210 RTX_FRAME_RELATED_P (insn
) = 1;
3212 /* Don't even pretend that the unwind code can intuit its way
3213 through a pair of interleaved post_modify iterators. Just
3214 provide the correct answer. */
3216 if (frame_pointer_needed
)
3218 base
= hard_frame_pointer_rtx
;
3223 base
= stack_pointer_rtx
;
3224 off
= current_frame_info
.total_size
- cfa_off
;
3227 add_reg_note (insn
, REG_CFA_OFFSET
,
3228 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg
),
3229 plus_constant (Pmode
,
3236 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3238 int iter
= spill_fill_data
.next_iter
;
3241 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3242 GEN_INT (cfa_off
)));
3243 spill_fill_data
.prev_insn
[iter
] = insn
;
3246 /* Wrapper functions that discards the CONST_INT spill offset. These
3247 exist so that we can give gr_spill/gr_fill the offset they need and
3248 use a consistent function interface. */
3251 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3253 return gen_movdi (dest
, src
);
3257 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3259 return gen_fr_spill (dest
, src
);
3263 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3265 return gen_fr_restore (dest
, src
);
3268 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3270 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3271 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3273 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3274 inclusive. These are offsets from the current stack pointer. BS_SIZE
3275 is the size of the backing store. ??? This clobbers r2 and r3. */
3278 ia64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
3281 rtx r2
= gen_rtx_REG (Pmode
, GR_REG (2));
3282 rtx r3
= gen_rtx_REG (Pmode
, GR_REG (3));
3283 rtx p6
= gen_rtx_REG (BImode
, PR_REG (6));
3285 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3286 of the Register Stack Engine. We also need to probe it after checking
3287 that the 2 stacks don't overlap. */
3288 emit_insn (gen_bsp_value (r3
));
3289 emit_move_insn (r2
, GEN_INT (-(first
+ size
)));
3291 /* Compare current value of BSP and SP registers. */
3292 emit_insn (gen_rtx_SET (p6
, gen_rtx_fmt_ee (LTU
, BImode
,
3293 r3
, stack_pointer_rtx
)));
3295 /* Compute the address of the probe for the Backing Store (which grows
3296 towards higher addresses). We probe only at the first offset of
3297 the next page because some OS (eg Linux/ia64) only extend the
3298 backing store when this specific address is hit (but generate a SEGV
3299 on other address). Page size is the worst case (4KB). The reserve
3300 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3301 Also compute the address of the last probe for the memory stack
3302 (which grows towards lower addresses). */
3303 emit_insn (gen_rtx_SET (r3
, plus_constant (Pmode
, r3
, 4095)));
3304 emit_insn (gen_rtx_SET (r2
, gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3306 /* Compare them and raise SEGV if the former has topped the latter. */
3307 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3308 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3309 gen_rtx_SET (p6
, gen_rtx_fmt_ee (GEU
, BImode
,
3311 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode
, r3
, GEN_INT (12),
3314 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3315 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3316 gen_rtx_TRAP_IF (VOIDmode
, const1_rtx
,
3319 /* Probe the Backing Store if necessary. */
3321 emit_stack_probe (r3
);
3323 /* Probe the memory stack if necessary. */
3327 /* See if we have a constant small number of probes to generate. If so,
3328 that's the easy case. */
3329 else if (size
<= PROBE_INTERVAL
)
3330 emit_stack_probe (r2
);
3332 /* The run-time loop is made up of 9 insns in the generic case while this
3333 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3334 else if (size
<= 4 * PROBE_INTERVAL
)
3338 emit_move_insn (r2
, GEN_INT (-(first
+ PROBE_INTERVAL
)));
3339 emit_insn (gen_rtx_SET (r2
,
3340 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3341 emit_stack_probe (r2
);
3343 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3344 it exceeds SIZE. If only two probes are needed, this will not
3345 generate any code. Then probe at FIRST + SIZE. */
3346 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
3348 emit_insn (gen_rtx_SET (r2
,
3349 plus_constant (Pmode
, r2
, -PROBE_INTERVAL
)));
3350 emit_stack_probe (r2
);
3353 emit_insn (gen_rtx_SET (r2
,
3354 plus_constant (Pmode
, r2
,
3355 (i
- PROBE_INTERVAL
) - size
)));
3356 emit_stack_probe (r2
);
3359 /* Otherwise, do the same as above, but in a loop. Note that we must be
3360 extra careful with variables wrapping around because we might be at
3361 the very top (or the very bottom) of the address space and we have
3362 to be able to handle this case properly; in particular, we use an
3363 equality test for the loop condition. */
3366 HOST_WIDE_INT rounded_size
;
3368 emit_move_insn (r2
, GEN_INT (-first
));
3371 /* Step 1: round SIZE to the previous multiple of the interval. */
3373 rounded_size
= size
& -PROBE_INTERVAL
;
3376 /* Step 2: compute initial and final value of the loop counter. */
3378 /* TEST_ADDR = SP + FIRST. */
3379 emit_insn (gen_rtx_SET (r2
,
3380 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3382 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3383 if (rounded_size
> (1 << 21))
3385 emit_move_insn (r3
, GEN_INT (-rounded_size
));
3386 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
, r3
)));
3389 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
,
3390 GEN_INT (-rounded_size
))));
3397 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3400 while (TEST_ADDR != LAST_ADDR)
3402 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3403 until it is equal to ROUNDED_SIZE. */
3405 emit_insn (gen_probe_stack_range (r2
, r2
, r3
));
3408 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3409 that SIZE is equal to ROUNDED_SIZE. */
3411 /* TEMP = SIZE - ROUNDED_SIZE. */
3412 if (size
!= rounded_size
)
3414 emit_insn (gen_rtx_SET (r2
, plus_constant (Pmode
, r2
,
3415 rounded_size
- size
)));
3416 emit_stack_probe (r2
);
3420 /* Make sure nothing is scheduled before we are done. */
3421 emit_insn (gen_blockage ());
3424 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3425 absolute addresses. */
3428 output_probe_stack_range (rtx reg1
, rtx reg2
)
3430 static int labelno
= 0;
3434 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
3437 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
3439 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3441 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
3442 output_asm_insn ("addl %0 = %1, %0", xops
);
3443 fputs ("\t;;\n", asm_out_file
);
3445 /* Probe at TEST_ADDR. */
3446 output_asm_insn ("probe.w.fault %0, 0", xops
);
3448 /* Test if TEST_ADDR == LAST_ADDR. */
3450 xops
[2] = gen_rtx_REG (BImode
, PR_REG (6));
3451 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops
);
3454 fprintf (asm_out_file
, "\t(%s) br.cond.dpnt ", reg_names
[PR_REG (7)]);
3455 assemble_name_raw (asm_out_file
, loop_lab
);
3456 fputc ('\n', asm_out_file
);
3461 /* Called after register allocation to add any instructions needed for the
3462 prologue. Using a prologue insn is favored compared to putting all of the
3463 instructions in output_function_prologue(), since it allows the scheduler
3464 to intermix instructions with the saves of the caller saved registers. In
3465 some cases, it might be necessary to emit a barrier instruction as the last
3466 insn to prevent such scheduling.
3468 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3469 so that the debug info generation code can handle them properly.
3471 The register save area is laid out like so:
3473 [ varargs spill area ]
3474 [ fr register spill area ]
3475 [ br register spill area ]
3476 [ ar register spill area ]
3477 [ pr register spill area ]
3478 [ gr register spill area ] */
3480 /* ??? Get inefficient code when the frame size is larger than can fit in an
3481 adds instruction. */
3484 ia64_expand_prologue (void)
3487 rtx ar_pfs_save_reg
, ar_unat_save_reg
;
3488 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3491 ia64_compute_frame_size (get_frame_size ());
3492 last_scratch_gr_reg
= 15;
3494 if (flag_stack_usage_info
)
3495 current_function_static_stack_size
= current_frame_info
.total_size
;
3497 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3499 HOST_WIDE_INT size
= current_frame_info
.total_size
;
3500 int bs_size
= BACKING_STORE_SIZE (current_frame_info
.n_input_regs
3501 + current_frame_info
.n_local_regs
);
3503 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3505 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
3506 ia64_emit_probe_stack_range (get_stack_check_protect (),
3507 size
- get_stack_check_protect (),
3509 else if (size
+ bs_size
> get_stack_check_protect ())
3510 ia64_emit_probe_stack_range (get_stack_check_protect (),
3513 else if (size
+ bs_size
> 0)
3514 ia64_emit_probe_stack_range (get_stack_check_protect (), size
, bs_size
);
3519 fprintf (dump_file
, "ia64 frame related registers "
3520 "recorded in current_frame_info.r[]:\n");
3521 #define PRINTREG(a) if (current_frame_info.r[a]) \
3522 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3524 PRINTREG(reg_save_b0
);
3525 PRINTREG(reg_save_pr
);
3526 PRINTREG(reg_save_ar_pfs
);
3527 PRINTREG(reg_save_ar_unat
);
3528 PRINTREG(reg_save_ar_lc
);
3529 PRINTREG(reg_save_gp
);
3533 /* If there is no epilogue, then we don't need some prologue insns.
3534 We need to avoid emitting the dead prologue insns, because flow
3535 will complain about them. */
3541 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
3542 if ((e
->flags
& EDGE_FAKE
) == 0
3543 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3545 epilogue_p
= (e
!= NULL
);
3550 /* Set the local, input, and output register names. We need to do this
3551 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3552 half. If we use in/loc/out register names, then we get assembler errors
3553 in crtn.S because there is no alloc insn or regstk directive in there. */
3554 if (! TARGET_REG_NAMES
)
3556 int inputs
= current_frame_info
.n_input_regs
;
3557 int locals
= current_frame_info
.n_local_regs
;
3558 int outputs
= current_frame_info
.n_output_regs
;
3560 for (i
= 0; i
< inputs
; i
++)
3561 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3562 for (i
= 0; i
< locals
; i
++)
3563 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3564 for (i
= 0; i
< outputs
; i
++)
3565 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3568 /* Set the frame pointer register name. The regnum is logically loc79,
3569 but of course we'll not have allocated that many locals. Rather than
3570 worrying about renumbering the existing rtxs, we adjust the name. */
3571 /* ??? This code means that we can never use one local register when
3572 there is a frame pointer. loc79 gets wasted in this case, as it is
3573 renamed to a register that will never be used. See also the try_locals
3574 code in find_gr_spill. */
3575 if (current_frame_info
.r
[reg_fp
])
3577 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3578 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3579 = reg_names
[current_frame_info
.r
[reg_fp
]];
3580 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3583 /* We don't need an alloc instruction if we've used no outputs or locals. */
3584 if (current_frame_info
.n_local_regs
== 0
3585 && current_frame_info
.n_output_regs
== 0
3586 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3587 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3589 /* If there is no alloc, but there are input registers used, then we
3590 need a .regstk directive. */
3591 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3592 ar_pfs_save_reg
= NULL_RTX
;
3596 current_frame_info
.need_regstk
= 0;
3598 if (current_frame_info
.r
[reg_save_ar_pfs
])
3600 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3601 reg_emitted (reg_save_ar_pfs
);
3604 regno
= next_scratch_gr_reg ();
3605 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3607 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3608 GEN_INT (current_frame_info
.n_input_regs
),
3609 GEN_INT (current_frame_info
.n_local_regs
),
3610 GEN_INT (current_frame_info
.n_output_regs
),
3611 GEN_INT (current_frame_info
.n_rotate_regs
)));
3612 if (current_frame_info
.r
[reg_save_ar_pfs
])
3614 RTX_FRAME_RELATED_P (insn
) = 1;
3615 add_reg_note (insn
, REG_CFA_REGISTER
,
3616 gen_rtx_SET (ar_pfs_save_reg
,
3617 gen_rtx_REG (DImode
, AR_PFS_REGNUM
)));
3621 /* Set up frame pointer, stack pointer, and spill iterators. */
3623 n_varargs
= cfun
->machine
->n_varargs
;
3624 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3625 stack_pointer_rtx
, 0);
3627 if (frame_pointer_needed
)
3629 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3630 RTX_FRAME_RELATED_P (insn
) = 1;
3632 /* Force the unwind info to recognize this as defining a new CFA,
3633 rather than some temp register setup. */
3634 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL_RTX
);
3637 if (current_frame_info
.total_size
!= 0)
3639 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3642 if (satisfies_constraint_I (frame_size_rtx
))
3643 offset
= frame_size_rtx
;
3646 regno
= next_scratch_gr_reg ();
3647 offset
= gen_rtx_REG (DImode
, regno
);
3648 emit_move_insn (offset
, frame_size_rtx
);
3651 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3652 stack_pointer_rtx
, offset
));
3654 if (! frame_pointer_needed
)
3656 RTX_FRAME_RELATED_P (insn
) = 1;
3657 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3658 gen_rtx_SET (stack_pointer_rtx
,
3659 gen_rtx_PLUS (DImode
,
3664 /* ??? At this point we must generate a magic insn that appears to
3665 modify the stack pointer, the frame pointer, and all spill
3666 iterators. This would allow the most scheduling freedom. For
3667 now, just hard stop. */
3668 emit_insn (gen_blockage ());
3671 /* Must copy out ar.unat before doing any integer spills. */
3672 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3674 if (current_frame_info
.r
[reg_save_ar_unat
])
3677 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3678 reg_emitted (reg_save_ar_unat
);
3682 alt_regno
= next_scratch_gr_reg ();
3683 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3684 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3687 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3688 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3689 if (current_frame_info
.r
[reg_save_ar_unat
])
3691 RTX_FRAME_RELATED_P (insn
) = 1;
3692 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3695 /* Even if we're not going to generate an epilogue, we still
3696 need to save the register so that EH works. */
3697 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3698 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3701 ar_unat_save_reg
= NULL_RTX
;
3703 /* Spill all varargs registers. Do this before spilling any GR registers,
3704 since we want the UNAT bits for the GR registers to override the UNAT
3705 bits from varargs, which we don't care about. */
3708 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3710 reg
= gen_rtx_REG (DImode
, regno
);
3711 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3714 /* Locate the bottom of the register save area. */
3715 cfa_off
= (current_frame_info
.spill_cfa_off
3716 + current_frame_info
.spill_size
3717 + current_frame_info
.extra_spill_size
);
3719 /* Save the predicate register block either in a register or in memory. */
3720 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3722 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3723 if (current_frame_info
.r
[reg_save_pr
] != 0)
3725 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3726 reg_emitted (reg_save_pr
);
3727 insn
= emit_move_insn (alt_reg
, reg
);
3729 /* ??? Denote pr spill/fill by a DImode move that modifies all
3730 64 hard registers. */
3731 RTX_FRAME_RELATED_P (insn
) = 1;
3732 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3734 /* Even if we're not going to generate an epilogue, we still
3735 need to save the register so that EH works. */
3737 emit_insn (gen_prologue_use (alt_reg
));
3741 alt_regno
= next_scratch_gr_reg ();
3742 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3743 insn
= emit_move_insn (alt_reg
, reg
);
3744 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3749 /* Handle AR regs in numerical order. All of them get special handling. */
3750 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3751 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3753 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3754 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3758 /* The alloc insn already copied ar.pfs into a general register. The
3759 only thing we have to do now is copy that register to a stack slot
3760 if we'd not allocated a local register for the job. */
3761 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3762 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3764 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3765 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3769 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3771 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3772 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3774 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3775 reg_emitted (reg_save_ar_lc
);
3776 insn
= emit_move_insn (alt_reg
, reg
);
3777 RTX_FRAME_RELATED_P (insn
) = 1;
3778 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3780 /* Even if we're not going to generate an epilogue, we still
3781 need to save the register so that EH works. */
3783 emit_insn (gen_prologue_use (alt_reg
));
3787 alt_regno
= next_scratch_gr_reg ();
3788 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3789 emit_move_insn (alt_reg
, reg
);
3790 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3795 /* Save the return pointer. */
3796 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3798 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3799 if (current_frame_info
.r
[reg_save_b0
] != 0)
3801 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3802 reg_emitted (reg_save_b0
);
3803 insn
= emit_move_insn (alt_reg
, reg
);
3804 RTX_FRAME_RELATED_P (insn
) = 1;
3805 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (alt_reg
, pc_rtx
));
3807 /* Even if we're not going to generate an epilogue, we still
3808 need to save the register so that EH works. */
3810 emit_insn (gen_prologue_use (alt_reg
));
3814 alt_regno
= next_scratch_gr_reg ();
3815 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3816 emit_move_insn (alt_reg
, reg
);
3817 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3822 if (current_frame_info
.r
[reg_save_gp
])
3824 reg_emitted (reg_save_gp
);
3825 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3826 current_frame_info
.r
[reg_save_gp
]),
3827 pic_offset_table_rtx
);
3830 /* We should now be at the base of the gr/br/fr spill area. */
3831 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3832 + current_frame_info
.spill_size
));
3834 /* Spill all general registers. */
3835 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3836 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3838 reg
= gen_rtx_REG (DImode
, regno
);
3839 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3843 /* Spill the rest of the BR registers. */
3844 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3845 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3847 alt_regno
= next_scratch_gr_reg ();
3848 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3849 reg
= gen_rtx_REG (DImode
, regno
);
3850 emit_move_insn (alt_reg
, reg
);
3851 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3855 /* Align the frame and spill all FR registers. */
3856 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3857 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3859 gcc_assert (!(cfa_off
& 15));
3860 reg
= gen_rtx_REG (XFmode
, regno
);
3861 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3865 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3867 finish_spill_pointers ();
3870 /* Output the textual info surrounding the prologue. */
3873 ia64_start_function (FILE *file
, const char *fnname
,
3874 tree decl ATTRIBUTE_UNUSED
)
3876 #if TARGET_ABI_OPEN_VMS
3877 vms_start_function (fnname
);
3880 fputs ("\t.proc ", file
);
3881 assemble_name (file
, fnname
);
3883 ASM_OUTPUT_LABEL (file
, fnname
);
3886 /* Called after register allocation to add any instructions needed for the
3887 epilogue. Using an epilogue insn is favored compared to putting all of the
3888 instructions in output_function_prologue(), since it allows the scheduler
3889 to intermix instructions with the saves of the caller saved registers. In
3890 some cases, it might be necessary to emit a barrier instruction as the last
3891 insn to prevent such scheduling. */
3894 ia64_expand_epilogue (int sibcall_p
)
3897 rtx reg
, alt_reg
, ar_unat_save_reg
;
3898 int regno
, alt_regno
, cfa_off
;
3900 ia64_compute_frame_size (get_frame_size ());
3902 /* If there is a frame pointer, then we use it instead of the stack
3903 pointer, so that the stack pointer does not need to be valid when
3904 the epilogue starts. See EXIT_IGNORE_STACK. */
3905 if (frame_pointer_needed
)
3906 setup_spill_pointers (current_frame_info
.n_spilled
,
3907 hard_frame_pointer_rtx
, 0);
3909 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3910 current_frame_info
.total_size
);
3912 if (current_frame_info
.total_size
!= 0)
3914 /* ??? At this point we must generate a magic insn that appears to
3915 modify the spill iterators and the frame pointer. This would
3916 allow the most scheduling freedom. For now, just hard stop. */
3917 emit_insn (gen_blockage ());
3920 /* Locate the bottom of the register save area. */
3921 cfa_off
= (current_frame_info
.spill_cfa_off
3922 + current_frame_info
.spill_size
3923 + current_frame_info
.extra_spill_size
);
3925 /* Restore the predicate registers. */
3926 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3928 if (current_frame_info
.r
[reg_save_pr
] != 0)
3930 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3931 reg_emitted (reg_save_pr
);
3935 alt_regno
= next_scratch_gr_reg ();
3936 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3937 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3940 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3941 emit_move_insn (reg
, alt_reg
);
3944 /* Restore the application registers. */
3946 /* Load the saved unat from the stack, but do not restore it until
3947 after the GRs have been restored. */
3948 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3950 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3953 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3954 reg_emitted (reg_save_ar_unat
);
3958 alt_regno
= next_scratch_gr_reg ();
3959 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3960 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3961 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3966 ar_unat_save_reg
= NULL_RTX
;
3968 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3970 reg_emitted (reg_save_ar_pfs
);
3971 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3972 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3973 emit_move_insn (reg
, alt_reg
);
3975 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3977 alt_regno
= next_scratch_gr_reg ();
3978 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3979 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3981 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3982 emit_move_insn (reg
, alt_reg
);
3985 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3987 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3989 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3990 reg_emitted (reg_save_ar_lc
);
3994 alt_regno
= next_scratch_gr_reg ();
3995 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3996 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3999 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
4000 emit_move_insn (reg
, alt_reg
);
4003 /* Restore the return pointer. */
4004 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4006 if (current_frame_info
.r
[reg_save_b0
] != 0)
4008 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4009 reg_emitted (reg_save_b0
);
4013 alt_regno
= next_scratch_gr_reg ();
4014 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4015 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4018 reg
= gen_rtx_REG (DImode
, BR_REG (0));
4019 emit_move_insn (reg
, alt_reg
);
4022 /* We should now be at the base of the gr/br/fr spill area. */
4023 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
4024 + current_frame_info
.spill_size
));
4026 /* The GP may be stored on the stack in the prologue, but it's
4027 never restored in the epilogue. Skip the stack slot. */
4028 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
4031 /* Restore all general registers. */
4032 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
4033 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4035 reg
= gen_rtx_REG (DImode
, regno
);
4036 do_restore (gen_gr_restore
, reg
, cfa_off
);
4040 /* Restore the branch registers. */
4041 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
4042 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4044 alt_regno
= next_scratch_gr_reg ();
4045 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4046 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4048 reg
= gen_rtx_REG (DImode
, regno
);
4049 emit_move_insn (reg
, alt_reg
);
4052 /* Restore floating point registers. */
4053 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
4054 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4056 gcc_assert (!(cfa_off
& 15));
4057 reg
= gen_rtx_REG (XFmode
, regno
);
4058 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
4062 /* Restore ar.unat for real. */
4063 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
4065 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
4066 emit_move_insn (reg
, ar_unat_save_reg
);
4069 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
4071 finish_spill_pointers ();
4073 if (current_frame_info
.total_size
4074 || cfun
->machine
->ia64_eh_epilogue_sp
4075 || frame_pointer_needed
)
4077 /* ??? At this point we must generate a magic insn that appears to
4078 modify the spill iterators, the stack pointer, and the frame
4079 pointer. This would allow the most scheduling freedom. For now,
4081 emit_insn (gen_blockage ());
4084 if (cfun
->machine
->ia64_eh_epilogue_sp
)
4085 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
4086 else if (frame_pointer_needed
)
4088 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
4089 RTX_FRAME_RELATED_P (insn
) = 1;
4090 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
4092 else if (current_frame_info
.total_size
)
4094 rtx offset
, frame_size_rtx
;
4096 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
4097 if (satisfies_constraint_I (frame_size_rtx
))
4098 offset
= frame_size_rtx
;
4101 regno
= next_scratch_gr_reg ();
4102 offset
= gen_rtx_REG (DImode
, regno
);
4103 emit_move_insn (offset
, frame_size_rtx
);
4106 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
4109 RTX_FRAME_RELATED_P (insn
) = 1;
4110 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
4111 gen_rtx_SET (stack_pointer_rtx
,
4112 gen_rtx_PLUS (DImode
,
4117 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
4118 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
4121 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
4124 int fp
= GR_REG (2);
4125 /* We need a throw away register here, r0 and r1 are reserved,
4126 so r2 is the first available call clobbered register. If
4127 there was a frame_pointer register, we may have swapped the
4128 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4129 sure we're using the string "r2" when emitting the register
4130 name for the assembler. */
4131 if (current_frame_info
.r
[reg_fp
]
4132 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
4133 fp
= HARD_FRAME_POINTER_REGNUM
;
4135 /* We must emit an alloc to force the input registers to become output
4136 registers. Otherwise, if the callee tries to pass its parameters
4137 through to another call without an intervening alloc, then these
4139 /* ??? We don't need to preserve all input registers. We only need to
4140 preserve those input registers used as arguments to the sibling call.
4141 It is unclear how to compute that number here. */
4142 if (current_frame_info
.n_input_regs
!= 0)
4144 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
4146 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
4147 const0_rtx
, const0_rtx
,
4148 n_inputs
, const0_rtx
));
4149 RTX_FRAME_RELATED_P (insn
) = 1;
4151 /* ??? We need to mark the alloc as frame-related so that it gets
4152 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4153 But there's nothing dwarf2 related to be done wrt the register
4154 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4155 the empty parallel means dwarf2out will not see anything. */
4156 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4157 gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (0)));
4162 /* Return 1 if br.ret can do all the work required to return from a
4166 ia64_direct_return (void)
4168 if (reload_completed
&& ! frame_pointer_needed
)
4170 ia64_compute_frame_size (get_frame_size ());
4172 return (current_frame_info
.total_size
== 0
4173 && current_frame_info
.n_spilled
== 0
4174 && current_frame_info
.r
[reg_save_b0
] == 0
4175 && current_frame_info
.r
[reg_save_pr
] == 0
4176 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
4177 && current_frame_info
.r
[reg_save_ar_unat
] == 0
4178 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
4183 /* Return the magic cookie that we use to hold the return address
4184 during early compilation. */
4187 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
4191 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
4194 /* Split this value after reload, now that we know where the return
4195 address is saved. */
4198 ia64_split_return_addr_rtx (rtx dest
)
4202 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4204 if (current_frame_info
.r
[reg_save_b0
] != 0)
4206 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4207 reg_emitted (reg_save_b0
);
4215 /* Compute offset from CFA for BR0. */
4216 /* ??? Must be kept in sync with ia64_expand_prologue. */
4217 off
= (current_frame_info
.spill_cfa_off
4218 + current_frame_info
.spill_size
);
4219 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
4220 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4223 /* Convert CFA offset to a register based offset. */
4224 if (frame_pointer_needed
)
4225 src
= hard_frame_pointer_rtx
;
4228 src
= stack_pointer_rtx
;
4229 off
+= current_frame_info
.total_size
;
4232 /* Load address into scratch register. */
4233 off_r
= GEN_INT (off
);
4234 if (satisfies_constraint_I (off_r
))
4235 emit_insn (gen_adddi3 (dest
, src
, off_r
));
4238 emit_move_insn (dest
, off_r
);
4239 emit_insn (gen_adddi3 (dest
, src
, dest
));
4242 src
= gen_rtx_MEM (Pmode
, dest
);
4246 src
= gen_rtx_REG (DImode
, BR_REG (0));
4248 emit_move_insn (dest
, src
);
4252 ia64_hard_regno_rename_ok (int from
, int to
)
4254 /* Don't clobber any of the registers we reserved for the prologue. */
4257 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
4258 if (to
== current_frame_info
.r
[r
]
4259 || from
== current_frame_info
.r
[r
]
4260 || to
== emitted_frame_related_regs
[r
]
4261 || from
== emitted_frame_related_regs
[r
])
4264 /* Don't use output registers outside the register frame. */
4265 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
4268 /* Retain even/oddness on predicate register pairs. */
4269 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
4270 return (from
& 1) == (to
& 1);
4275 /* Implement TARGET_HARD_REGNO_NREGS.
4277 ??? We say that BImode PR values require two registers. This allows us to
4278 easily store the normal and inverted values. We use CCImode to indicate
4279 a single predicate register. */
4282 ia64_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
4284 if (regno
== PR_REG (0) && mode
== DImode
)
4286 if (PR_REGNO_P (regno
) && (mode
) == BImode
)
4288 if ((PR_REGNO_P (regno
) || GR_REGNO_P (regno
)) && mode
== CCImode
)
4290 if (FR_REGNO_P (regno
) && mode
== XFmode
)
4292 if (FR_REGNO_P (regno
) && mode
== RFmode
)
4294 if (FR_REGNO_P (regno
) && mode
== XCmode
)
4296 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
4299 /* Implement TARGET_HARD_REGNO_MODE_OK. */
4302 ia64_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
4304 if (FR_REGNO_P (regno
))
4305 return (GET_MODE_CLASS (mode
) != MODE_CC
4309 if (PR_REGNO_P (regno
))
4310 return mode
== BImode
|| GET_MODE_CLASS (mode
) == MODE_CC
;
4312 if (GR_REGNO_P (regno
))
4313 return mode
!= XFmode
&& mode
!= XCmode
&& mode
!= RFmode
;
4315 if (AR_REGNO_P (regno
))
4316 return mode
== DImode
;
4318 if (BR_REGNO_P (regno
))
4319 return mode
== DImode
;
4324 /* Implement TARGET_MODES_TIEABLE_P.
4326 Don't tie integer and FP modes, as that causes us to get integer registers
4327 allocated for FP instructions. XFmode only supported in FP registers so
4328 we can't tie it with any other modes. */
4331 ia64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
4333 return (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
)
4334 && ((mode1
== XFmode
|| mode1
== XCmode
|| mode1
== RFmode
)
4335 == (mode2
== XFmode
|| mode2
== XCmode
|| mode2
== RFmode
))
4336 && (mode1
== BImode
) == (mode2
== BImode
));
4339 /* Target hook for assembling integer objects. Handle word-sized
4340 aligned objects and detect the cases when @fptr is needed. */
4343 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4345 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
4346 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
4347 && GET_CODE (x
) == SYMBOL_REF
4348 && SYMBOL_REF_FUNCTION_P (x
))
4350 static const char * const directive
[2][2] = {
4351 /* 64-bit pointer */ /* 32-bit pointer */
4352 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4353 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4355 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
4356 output_addr_const (asm_out_file
, x
);
4357 fputs (")\n", asm_out_file
);
4360 return default_assemble_integer (x
, size
, aligned_p
);
4363 /* Emit the function prologue. */
4366 ia64_output_function_prologue (FILE *file
)
4368 int mask
, grsave
, grsave_prev
;
4370 if (current_frame_info
.need_regstk
)
4371 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
4372 current_frame_info
.n_input_regs
,
4373 current_frame_info
.n_local_regs
,
4374 current_frame_info
.n_output_regs
,
4375 current_frame_info
.n_rotate_regs
);
4377 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4380 /* Emit the .prologue directive. */
4383 grsave
= grsave_prev
= 0;
4384 if (current_frame_info
.r
[reg_save_b0
] != 0)
4387 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
4389 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
4390 && (grsave_prev
== 0
4391 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
4394 if (grsave_prev
== 0)
4395 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
4396 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
4398 if (current_frame_info
.r
[reg_fp
] != 0
4399 && (grsave_prev
== 0
4400 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
4403 if (grsave_prev
== 0)
4404 grsave
= HARD_FRAME_POINTER_REGNUM
;
4405 grsave_prev
= current_frame_info
.r
[reg_fp
];
4407 if (current_frame_info
.r
[reg_save_pr
] != 0
4408 && (grsave_prev
== 0
4409 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
4412 if (grsave_prev
== 0)
4413 grsave
= current_frame_info
.r
[reg_save_pr
];
4416 if (mask
&& TARGET_GNU_AS
)
4417 fprintf (file
, "\t.prologue %d, %d\n", mask
,
4418 ia64_dbx_register_number (grsave
));
4420 fputs ("\t.prologue\n", file
);
4422 /* Emit a .spill directive, if necessary, to relocate the base of
4423 the register spill area. */
4424 if (current_frame_info
.spill_cfa_off
!= -16)
4425 fprintf (file
, "\t.spill %ld\n",
4426 (long) (current_frame_info
.spill_cfa_off
4427 + current_frame_info
.spill_size
));
4430 /* Emit the .body directive at the scheduled end of the prologue. */
4433 ia64_output_function_end_prologue (FILE *file
)
4435 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4438 fputs ("\t.body\n", file
);
4441 /* Emit the function epilogue. */
4444 ia64_output_function_epilogue (FILE *)
4448 if (current_frame_info
.r
[reg_fp
])
4450 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
4451 reg_names
[HARD_FRAME_POINTER_REGNUM
]
4452 = reg_names
[current_frame_info
.r
[reg_fp
]];
4453 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
4454 reg_emitted (reg_fp
);
4456 if (! TARGET_REG_NAMES
)
4458 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
4459 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
4460 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
4461 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
4462 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
4463 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
4466 current_frame_info
.initialized
= 0;
4470 ia64_dbx_register_number (int regno
)
4472 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4473 from its home at loc79 to something inside the register frame. We
4474 must perform the same renumbering here for the debug info. */
4475 if (current_frame_info
.r
[reg_fp
])
4477 if (regno
== HARD_FRAME_POINTER_REGNUM
)
4478 regno
= current_frame_info
.r
[reg_fp
];
4479 else if (regno
== current_frame_info
.r
[reg_fp
])
4480 regno
= HARD_FRAME_POINTER_REGNUM
;
4483 if (IN_REGNO_P (regno
))
4484 return 32 + regno
- IN_REG (0);
4485 else if (LOC_REGNO_P (regno
))
4486 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
4487 else if (OUT_REGNO_P (regno
))
4488 return (32 + current_frame_info
.n_input_regs
4489 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
4494 /* Implement TARGET_TRAMPOLINE_INIT.
4496 The trampoline should set the static chain pointer to value placed
4497 into the trampoline and should branch to the specified routine.
4498 To make the normal indirect-subroutine calling convention work,
4499 the trampoline must look like a function descriptor; the first
4500 word being the target address and the second being the target's
4503 We abuse the concept of a global pointer by arranging for it
4504 to point to the data we need to load. The complete trampoline
4505 has the following form:
4507 +-------------------+ \
4508 TRAMP: | __ia64_trampoline | |
4509 +-------------------+ > fake function descriptor
4511 +-------------------+ /
4512 | target descriptor |
4513 +-------------------+
4515 +-------------------+
4519 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
4521 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4522 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
4524 /* The Intel assembler requires that the global __ia64_trampoline symbol
4525 be declared explicitly */
4528 static bool declared_ia64_trampoline
= false;
4530 if (!declared_ia64_trampoline
)
4532 declared_ia64_trampoline
= true;
4533 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
4534 "__ia64_trampoline");
4538 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4539 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
4540 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
4541 static_chain
= convert_memory_address (Pmode
, static_chain
);
4543 /* Load up our iterator. */
4544 addr_reg
= copy_to_reg (addr
);
4545 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4547 /* The first two words are the fake descriptor:
4548 __ia64_trampoline, ADDR+16. */
4549 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4550 if (TARGET_ABI_OPEN_VMS
)
4552 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4553 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4554 relocation against function symbols to make it identical to the
4555 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4556 strict ELF and dereference to get the bare code address. */
4557 rtx reg
= gen_reg_rtx (Pmode
);
4558 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4559 emit_move_insn (reg
, tramp
);
4560 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4563 emit_move_insn (m_tramp
, tramp
);
4564 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4565 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4567 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (Pmode
, addr
, 16)));
4568 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4569 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4571 /* The third word is the target descriptor. */
4572 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4573 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4574 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4576 /* The fourth word is the static chain. */
4577 emit_move_insn (m_tramp
, static_chain
);
4580 /* Do any needed setup for a variadic function. CUM has not been updated
4581 for the last named argument which has type TYPE and mode MODE.
4583 We generate the actual spill instructions during prologue generation. */
4586 ia64_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4587 tree type
, int * pretend_size
,
4588 int second_time ATTRIBUTE_UNUSED
)
4590 CUMULATIVE_ARGS next_cum
= *get_cumulative_args (cum
);
4592 /* Skip the current argument. */
4593 ia64_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
, 1);
4595 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4597 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4598 *pretend_size
= n
* UNITS_PER_WORD
;
4599 cfun
->machine
->n_varargs
= n
;
4603 /* Check whether TYPE is a homogeneous floating point aggregate. If
4604 it is, return the mode of the floating point type that appears
4605 in all leafs. If it is not, return VOIDmode.
4607 An aggregate is a homogeneous floating point aggregate is if all
4608 fields/elements in it have the same floating point type (e.g,
4609 SFmode). 128-bit quad-precision floats are excluded.
4611 Variable sized aggregates should never arrive here, since we should
4612 have already decided to pass them by reference. Top-level zero-sized
4613 aggregates are excluded because our parallels crash the middle-end. */
4616 hfa_element_mode (const_tree type
, bool nested
)
4618 machine_mode element_mode
= VOIDmode
;
4620 enum tree_code code
= TREE_CODE (type
);
4621 int know_element_mode
= 0;
4624 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4629 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4630 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4631 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4632 case LANG_TYPE
: case FUNCTION_TYPE
:
4635 /* Fortran complex types are supposed to be HFAs, so we need to handle
4636 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4639 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4640 && TYPE_MODE (type
) != TCmode
)
4641 return GET_MODE_INNER (TYPE_MODE (type
));
4646 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4647 mode if this is contained within an aggregate. */
4648 if (nested
&& TYPE_MODE (type
) != TFmode
)
4649 return TYPE_MODE (type
);
4654 return hfa_element_mode (TREE_TYPE (type
), 1);
4658 case QUAL_UNION_TYPE
:
4659 for (t
= TYPE_FIELDS (type
); t
; t
= DECL_CHAIN (t
))
4661 if (TREE_CODE (t
) != FIELD_DECL
)
4664 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4665 if (know_element_mode
)
4667 if (mode
!= element_mode
)
4670 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4674 know_element_mode
= 1;
4675 element_mode
= mode
;
4678 return element_mode
;
4681 /* If we reach here, we probably have some front-end specific type
4682 that the backend doesn't know about. This can happen via the
4683 aggregate_value_p call in init_function_start. All we can do is
4684 ignore unknown tree types. */
4691 /* Return the number of words required to hold a quantity of TYPE and MODE
4692 when passed as an argument. */
4694 ia64_function_arg_words (const_tree type
, machine_mode mode
)
4698 if (mode
== BLKmode
)
4699 words
= int_size_in_bytes (type
);
4701 words
= GET_MODE_SIZE (mode
);
4703 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4706 /* Return the number of registers that should be skipped so the current
4707 argument (described by TYPE and WORDS) will be properly aligned.
4709 Integer and float arguments larger than 8 bytes start at the next
4710 even boundary. Aggregates larger than 8 bytes start at the next
4711 even boundary if the aggregate has 16 byte alignment. Note that
4712 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4713 but are still to be aligned in registers.
4715 ??? The ABI does not specify how to handle aggregates with
4716 alignment from 9 to 15 bytes, or greater than 16. We handle them
4717 all as if they had 16 byte alignment. Such aggregates can occur
4718 only if gcc extensions are used. */
4720 ia64_function_arg_offset (const CUMULATIVE_ARGS
*cum
,
4721 const_tree type
, int words
)
4723 /* No registers are skipped on VMS. */
4724 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4728 && TREE_CODE (type
) != INTEGER_TYPE
4729 && TREE_CODE (type
) != REAL_TYPE
)
4730 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4735 /* Return rtx for register where argument is passed, or zero if it is passed
4737 /* ??? 128-bit quad-precision floats are always passed in general
4741 ia64_function_arg_1 (cumulative_args_t cum_v
, machine_mode mode
,
4742 const_tree type
, bool named
, bool incoming
)
4744 const CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4746 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4747 int words
= ia64_function_arg_words (type
, mode
);
4748 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4749 machine_mode hfa_mode
= VOIDmode
;
4751 /* For OPEN VMS, emit the instruction setting up the argument register here,
4752 when we know this will be together with the other arguments setup related
4753 insns. This is not the conceptually best place to do this, but this is
4754 the easiest as we have convenient access to cumulative args info. */
4756 if (TARGET_ABI_OPEN_VMS
&& mode
== VOIDmode
&& type
== void_type_node
4759 unsigned HOST_WIDE_INT regval
= cum
->words
;
4762 for (i
= 0; i
< 8; i
++)
4763 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4765 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4769 /* If all argument slots are used, then it must go on the stack. */
4770 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4773 /* On OpenVMS argument is either in Rn or Fn. */
4774 if (TARGET_ABI_OPEN_VMS
)
4776 if (FLOAT_MODE_P (mode
))
4777 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->words
);
4779 return gen_rtx_REG (mode
, basereg
+ cum
->words
);
4782 /* Check for and handle homogeneous FP aggregates. */
4784 hfa_mode
= hfa_element_mode (type
, 0);
4786 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4787 and unprototyped hfas are passed specially. */
4788 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4792 int fp_regs
= cum
->fp_regs
;
4793 int int_regs
= cum
->words
+ offset
;
4794 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4798 /* If prototyped, pass it in FR regs then GR regs.
4799 If not prototyped, pass it in both FR and GR regs.
4801 If this is an SFmode aggregate, then it is possible to run out of
4802 FR regs while GR regs are still left. In that case, we pass the
4803 remaining part in the GR regs. */
4805 /* Fill the FP regs. We do this always. We stop if we reach the end
4806 of the argument, the last FP register, or the last argument slot. */
4808 byte_size
= ((mode
== BLKmode
)
4809 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4810 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4812 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4813 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4815 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4816 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4820 args_byte_size
+= hfa_size
;
4824 /* If no prototype, then the whole thing must go in GR regs. */
4825 if (! cum
->prototype
)
4827 /* If this is an SFmode aggregate, then we might have some left over
4828 that needs to go in GR regs. */
4829 else if (byte_size
!= offset
)
4830 int_regs
+= offset
/ UNITS_PER_WORD
;
4832 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4834 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4836 machine_mode gr_mode
= DImode
;
4837 unsigned int gr_size
;
4839 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4840 then this goes in a GR reg left adjusted/little endian, right
4841 adjusted/big endian. */
4842 /* ??? Currently this is handled wrong, because 4-byte hunks are
4843 always right adjusted/little endian. */
4846 /* If we have an even 4 byte hunk because the aggregate is a
4847 multiple of 4 bytes in size, then this goes in a GR reg right
4848 adjusted/little endian. */
4849 else if (byte_size
- offset
== 4)
4852 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4853 gen_rtx_REG (gr_mode
, (basereg
4857 gr_size
= GET_MODE_SIZE (gr_mode
);
4859 if (gr_size
== UNITS_PER_WORD
4860 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4862 else if (gr_size
> UNITS_PER_WORD
)
4863 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4865 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4868 /* Integral and aggregates go in general registers. If we have run out of
4869 FR registers, then FP values must also go in general registers. This can
4870 happen when we have a SFmode HFA. */
4871 else if (mode
== TFmode
|| mode
== TCmode
4872 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4874 int byte_size
= ((mode
== BLKmode
)
4875 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4876 if (BYTES_BIG_ENDIAN
4877 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4878 && byte_size
< UNITS_PER_WORD
4881 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4882 gen_rtx_REG (DImode
,
4883 (basereg
+ cum
->words
4886 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4889 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4893 /* If there is a prototype, then FP values go in a FR register when
4894 named, and in a GR register when unnamed. */
4895 else if (cum
->prototype
)
4898 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4899 /* In big-endian mode, an anonymous SFmode value must be represented
4900 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4901 the value into the high half of the general register. */
4902 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4903 return gen_rtx_PARALLEL (mode
,
4905 gen_rtx_EXPR_LIST (VOIDmode
,
4906 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4909 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4911 /* If there is no prototype, then FP values go in both FR and GR
4915 /* See comment above. */
4916 machine_mode inner_mode
=
4917 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4919 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4920 gen_rtx_REG (mode
, (FR_ARG_FIRST
4923 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4924 gen_rtx_REG (inner_mode
,
4925 (basereg
+ cum
->words
4929 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4933 /* Implement TARGET_FUNCION_ARG target hook. */
4936 ia64_function_arg (cumulative_args_t cum
, machine_mode mode
,
4937 const_tree type
, bool named
)
4939 return ia64_function_arg_1 (cum
, mode
, type
, named
, false);
4942 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4945 ia64_function_incoming_arg (cumulative_args_t cum
,
4947 const_tree type
, bool named
)
4949 return ia64_function_arg_1 (cum
, mode
, type
, named
, true);
4952 /* Return number of bytes, at the beginning of the argument, that must be
4953 put in registers. 0 is the argument is entirely in registers or entirely
4957 ia64_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
4958 tree type
, bool named ATTRIBUTE_UNUSED
)
4960 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4962 int words
= ia64_function_arg_words (type
, mode
);
4963 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4965 /* If all argument slots are used, then it must go on the stack. */
4966 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4969 /* It doesn't matter whether the argument goes in FR or GR regs. If
4970 it fits within the 8 argument slots, then it goes entirely in
4971 registers. If it extends past the last argument slot, then the rest
4972 goes on the stack. */
4974 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4977 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4980 /* Return ivms_arg_type based on machine_mode. */
4982 static enum ivms_arg_type
4983 ia64_arg_type (machine_mode mode
)
4996 /* Update CUM to point after this argument. This is patterned after
4997 ia64_function_arg. */
5000 ia64_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
5001 const_tree type
, bool named
)
5003 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5004 int words
= ia64_function_arg_words (type
, mode
);
5005 int offset
= ia64_function_arg_offset (cum
, type
, words
);
5006 machine_mode hfa_mode
= VOIDmode
;
5008 /* If all arg slots are already full, then there is nothing to do. */
5009 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
5011 cum
->words
+= words
+ offset
;
5015 cum
->atypes
[cum
->words
] = ia64_arg_type (mode
);
5016 cum
->words
+= words
+ offset
;
5018 /* On OpenVMS argument is either in Rn or Fn. */
5019 if (TARGET_ABI_OPEN_VMS
)
5021 cum
->int_regs
= cum
->words
;
5022 cum
->fp_regs
= cum
->words
;
5026 /* Check for and handle homogeneous FP aggregates. */
5028 hfa_mode
= hfa_element_mode (type
, 0);
5030 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5031 and unprototyped hfas are passed specially. */
5032 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
5034 int fp_regs
= cum
->fp_regs
;
5035 /* This is the original value of cum->words + offset. */
5036 int int_regs
= cum
->words
- words
;
5037 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5041 /* If prototyped, pass it in FR regs then GR regs.
5042 If not prototyped, pass it in both FR and GR regs.
5044 If this is an SFmode aggregate, then it is possible to run out of
5045 FR regs while GR regs are still left. In that case, we pass the
5046 remaining part in the GR regs. */
5048 /* Fill the FP regs. We do this always. We stop if we reach the end
5049 of the argument, the last FP register, or the last argument slot. */
5051 byte_size
= ((mode
== BLKmode
)
5052 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
5053 args_byte_size
= int_regs
* UNITS_PER_WORD
;
5055 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
5056 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
5059 args_byte_size
+= hfa_size
;
5063 cum
->fp_regs
= fp_regs
;
5066 /* Integral and aggregates go in general registers. So do TFmode FP values.
5067 If we have run out of FR registers, then other FP values must also go in
5068 general registers. This can happen when we have a SFmode HFA. */
5069 else if (mode
== TFmode
|| mode
== TCmode
5070 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
5071 cum
->int_regs
= cum
->words
;
5073 /* If there is a prototype, then FP values go in a FR register when
5074 named, and in a GR register when unnamed. */
5075 else if (cum
->prototype
)
5078 cum
->int_regs
= cum
->words
;
5080 /* ??? Complex types should not reach here. */
5081 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5083 /* If there is no prototype, then FP values go in both FR and GR
5087 /* ??? Complex types should not reach here. */
5088 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5089 cum
->int_regs
= cum
->words
;
5093 /* Arguments with alignment larger than 8 bytes start at the next even
5094 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5095 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5098 ia64_function_arg_boundary (machine_mode mode
, const_tree type
)
5100 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
5101 return PARM_BOUNDARY
* 2;
5105 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
5106 return PARM_BOUNDARY
* 2;
5108 return PARM_BOUNDARY
;
5111 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
5112 return PARM_BOUNDARY
* 2;
5114 return PARM_BOUNDARY
;
5117 /* True if it is OK to do sibling call optimization for the specified
5118 call expression EXP. DECL will be the called function, or NULL if
5119 this is an indirect call. */
5121 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5123 /* We can't perform a sibcall if the current function has the syscall_linkage
5125 if (lookup_attribute ("syscall_linkage",
5126 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
5129 /* We must always return with our current GP. This means we can
5130 only sibcall to functions defined in the current module unless
5131 TARGET_CONST_GP is set to true. */
5132 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
5136 /* Implement va_arg. */
5139 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
5142 /* Variable sized types are passed by reference. */
5143 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
5145 tree ptrtype
= build_pointer_type (type
);
5146 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
5147 return build_va_arg_indirect_ref (addr
);
5150 /* Aggregate arguments with alignment larger than 8 bytes start at
5151 the next even boundary. Integer and floating point arguments
5152 do so if they are larger than 8 bytes, whether or not they are
5153 also aligned larger than 8 bytes. */
5154 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
5155 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
5157 tree t
= fold_build_pointer_plus_hwi (valist
, 2 * UNITS_PER_WORD
- 1);
5158 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5159 build_int_cst (TREE_TYPE (t
), -2 * UNITS_PER_WORD
));
5160 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
5163 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5166 /* Return 1 if function return value returned in memory. Return 0 if it is
5170 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
5173 machine_mode hfa_mode
;
5174 HOST_WIDE_INT byte_size
;
5176 mode
= TYPE_MODE (valtype
);
5177 byte_size
= GET_MODE_SIZE (mode
);
5178 if (mode
== BLKmode
)
5180 byte_size
= int_size_in_bytes (valtype
);
5185 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5187 hfa_mode
= hfa_element_mode (valtype
, 0);
5188 if (hfa_mode
!= VOIDmode
)
5190 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5192 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
5197 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
5203 /* Return rtx for register that holds the function return value. */
5206 ia64_function_value (const_tree valtype
,
5207 const_tree fn_decl_or_type
,
5208 bool outgoing ATTRIBUTE_UNUSED
)
5211 machine_mode hfa_mode
;
5213 const_tree func
= fn_decl_or_type
;
5216 && !DECL_P (fn_decl_or_type
))
5219 mode
= TYPE_MODE (valtype
);
5220 hfa_mode
= hfa_element_mode (valtype
, 0);
5222 if (hfa_mode
!= VOIDmode
)
5230 hfa_size
= GET_MODE_SIZE (hfa_mode
);
5231 byte_size
= ((mode
== BLKmode
)
5232 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
5234 for (i
= 0; offset
< byte_size
; i
++)
5236 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5237 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
5241 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5243 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
5244 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
5247 bool need_parallel
= false;
5249 /* In big-endian mode, we need to manage the layout of aggregates
5250 in the registers so that we get the bits properly aligned in
5251 the highpart of the registers. */
5252 if (BYTES_BIG_ENDIAN
5253 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
5254 need_parallel
= true;
5256 /* Something like struct S { long double x; char a[0] } is not an
5257 HFA structure, and therefore doesn't go in fp registers. But
5258 the middle-end will give it XFmode anyway, and XFmode values
5259 don't normally fit in integer registers. So we need to smuggle
5260 the value inside a parallel. */
5261 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
5262 need_parallel
= true;
5272 bytesize
= int_size_in_bytes (valtype
);
5273 /* An empty PARALLEL is invalid here, but the return value
5274 doesn't matter for empty structs. */
5276 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5277 for (i
= 0; offset
< bytesize
; i
++)
5279 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5280 gen_rtx_REG (DImode
,
5283 offset
+= UNITS_PER_WORD
;
5285 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5288 mode
= promote_function_mode (valtype
, mode
, &unsignedp
,
5289 func
? TREE_TYPE (func
) : NULL_TREE
,
5292 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5296 /* Worker function for TARGET_LIBCALL_VALUE. */
5299 ia64_libcall_value (machine_mode mode
,
5300 const_rtx fun ATTRIBUTE_UNUSED
)
5302 return gen_rtx_REG (mode
,
5303 (((GET_MODE_CLASS (mode
) == MODE_FLOAT
5304 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5305 && (mode
) != TFmode
)
5306 ? FR_RET_FIRST
: GR_RET_FIRST
));
5309 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5312 ia64_function_value_regno_p (const unsigned int regno
)
5314 return ((regno
>= GR_RET_FIRST
&& regno
<= GR_RET_LAST
)
5315 || (regno
>= FR_RET_FIRST
&& regno
<= FR_RET_LAST
));
5318 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5319 We need to emit DTP-relative relocations. */
5322 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5324 gcc_assert (size
== 4 || size
== 8);
5326 fputs ("\tdata4.ua\t@dtprel(", file
);
5328 fputs ("\tdata8.ua\t@dtprel(", file
);
5329 output_addr_const (file
, x
);
5333 /* Print a memory address as an operand to reference that memory location. */
5335 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5336 also call this from ia64_print_operand for memory addresses. */
5339 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
5340 machine_mode
/*mode*/,
5341 rtx address ATTRIBUTE_UNUSED
)
5345 /* Print an operand to an assembler instruction.
5346 C Swap and print a comparison operator.
5347 D Print an FP comparison operator.
5348 E Print 32 - constant, for SImode shifts as extract.
5349 e Print 64 - constant, for DImode rotates.
5350 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5351 a floating point register emitted normally.
5352 G A floating point constant.
5353 I Invert a predicate register by adding 1.
5354 J Select the proper predicate register for a condition.
5355 j Select the inverse predicate register for a condition.
5356 O Append .acq for volatile load.
5357 P Postincrement of a MEM.
5358 Q Append .rel for volatile store.
5359 R Print .s .d or nothing for a single, double or no truncation.
5360 S Shift amount for shladd instruction.
5361 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5362 for Intel assembler.
5363 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5364 for Intel assembler.
5365 X A pair of floating point registers.
5366 r Print register name, or constant 0 as r0. HP compatibility for
5368 v Print vector constant value as an 8-byte integer value. */
5371 ia64_print_operand (FILE * file
, rtx x
, int code
)
5378 /* Handled below. */
5383 enum rtx_code c
= swap_condition (GET_CODE (x
));
5384 fputs (GET_RTX_NAME (c
), file
);
5389 switch (GET_CODE (x
))
5416 str
= GET_RTX_NAME (GET_CODE (x
));
5423 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
5427 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
5431 if (x
== CONST0_RTX (GET_MODE (x
)))
5432 str
= reg_names
[FR_REG (0)];
5433 else if (x
== CONST1_RTX (GET_MODE (x
)))
5434 str
= reg_names
[FR_REG (1)];
5437 gcc_assert (GET_CODE (x
) == REG
);
5438 str
= reg_names
[REGNO (x
)];
5446 real_to_target (val
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
5447 if (GET_MODE (x
) == SFmode
)
5448 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
5449 else if (GET_MODE (x
) == DFmode
)
5450 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
5452 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
5455 output_operand_lossage ("invalid %%G mode");
5460 fputs (reg_names
[REGNO (x
) + 1], file
);
5466 unsigned int regno
= REGNO (XEXP (x
, 0));
5467 if (GET_CODE (x
) == EQ
)
5471 fputs (reg_names
[regno
], file
);
5476 if (MEM_VOLATILE_P (x
))
5477 fputs(".acq", file
);
5482 HOST_WIDE_INT value
;
5484 switch (GET_CODE (XEXP (x
, 0)))
5490 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5491 if (GET_CODE (x
) == CONST_INT
)
5495 gcc_assert (GET_CODE (x
) == REG
);
5496 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
5502 value
= GET_MODE_SIZE (GET_MODE (x
));
5506 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
5510 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
5515 if (MEM_VOLATILE_P (x
))
5516 fputs(".rel", file
);
5520 if (x
== CONST0_RTX (GET_MODE (x
)))
5522 else if (x
== CONST1_RTX (GET_MODE (x
)))
5524 else if (x
== CONST2_RTX (GET_MODE (x
)))
5527 output_operand_lossage ("invalid %%R value");
5531 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5535 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5537 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
5543 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5545 const char *prefix
= "0x";
5546 if (INTVAL (x
) & 0x80000000)
5548 fprintf (file
, "0xffffffff");
5551 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
5558 unsigned int regno
= REGNO (x
);
5559 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
5564 /* If this operand is the constant zero, write it as register zero.
5565 Any register, zero, or CONST_INT value is OK here. */
5566 if (GET_CODE (x
) == REG
)
5567 fputs (reg_names
[REGNO (x
)], file
);
5568 else if (x
== CONST0_RTX (GET_MODE (x
)))
5570 else if (GET_CODE (x
) == CONST_INT
)
5571 output_addr_const (file
, x
);
5573 output_operand_lossage ("invalid %%r value");
5577 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
5578 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
5585 /* For conditional branches, returns or calls, substitute
5586 sptk, dptk, dpnt, or spnt for %s. */
5587 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5590 int pred_val
= profile_probability::from_reg_br_prob_note
5591 (XINT (x
, 0)).to_reg_br_prob_base ();
5593 /* Guess top and bottom 10% statically predicted. */
5594 if (pred_val
< REG_BR_PROB_BASE
/ 50
5595 && br_prob_note_reliable_p (x
))
5597 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5599 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5600 || !br_prob_note_reliable_p (x
))
5605 else if (CALL_P (current_output_insn
))
5610 fputs (which
, file
);
5615 x
= current_insn_predicate
;
5618 unsigned int regno
= REGNO (XEXP (x
, 0));
5619 if (GET_CODE (x
) == EQ
)
5621 fprintf (file
, "(%s) ", reg_names
[regno
]);
5626 output_operand_lossage ("ia64_print_operand: unknown code");
5630 switch (GET_CODE (x
))
5632 /* This happens for the spill/restore instructions. */
5640 fputs (reg_names
[REGNO (x
)], file
);
5645 rtx addr
= XEXP (x
, 0);
5646 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5647 addr
= XEXP (addr
, 0);
5648 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5653 output_addr_const (file
, x
);
5660 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5663 ia64_print_operand_punct_valid_p (unsigned char code
)
5665 return (code
== '+' || code
== ',');
5668 /* Compute a (partial) cost for rtx X. Return true if the complete
5669 cost has been computed, and false if subexpressions should be
5670 scanned. In either case, *TOTAL contains the cost result. */
5671 /* ??? This is incomplete. */
5674 ia64_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
5675 int opno ATTRIBUTE_UNUSED
,
5676 int *total
, bool speed ATTRIBUTE_UNUSED
)
5678 int code
= GET_CODE (x
);
5686 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5689 if (satisfies_constraint_I (x
))
5691 else if (satisfies_constraint_J (x
))
5694 *total
= COSTS_N_INSNS (1);
5697 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5700 *total
= COSTS_N_INSNS (1);
5705 *total
= COSTS_N_INSNS (1);
5711 *total
= COSTS_N_INSNS (3);
5715 *total
= COSTS_N_INSNS (4);
5719 /* For multiplies wider than HImode, we have to go to the FPU,
5720 which normally involves copies. Plus there's the latency
5721 of the multiply itself, and the latency of the instructions to
5722 transfer integer regs to FP regs. */
5723 if (FLOAT_MODE_P (mode
))
5724 *total
= COSTS_N_INSNS (4);
5725 else if (GET_MODE_SIZE (mode
) > 2)
5726 *total
= COSTS_N_INSNS (10);
5728 *total
= COSTS_N_INSNS (2);
5733 if (FLOAT_MODE_P (mode
))
5735 *total
= COSTS_N_INSNS (4);
5743 *total
= COSTS_N_INSNS (1);
5750 /* We make divide expensive, so that divide-by-constant will be
5751 optimized to a multiply. */
5752 *total
= COSTS_N_INSNS (60);
5760 /* Calculate the cost of moving data from a register in class FROM to
5761 one in class TO, using MODE. */
5764 ia64_register_move_cost (machine_mode mode
, reg_class_t from
,
5767 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5768 if (to
== ADDL_REGS
)
5770 if (from
== ADDL_REGS
)
5773 /* All costs are symmetric, so reduce cases by putting the
5774 lower number class as the destination. */
5777 reg_class_t tmp
= to
;
5778 to
= from
, from
= tmp
;
5781 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5782 so that we get secondary memory reloads. Between FR_REGS,
5783 we have to make this at least as expensive as memory_move_cost
5784 to avoid spectacularly poor register class preferencing. */
5785 if (mode
== XFmode
|| mode
== RFmode
)
5787 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5788 return memory_move_cost (mode
, to
, false);
5796 /* Moving between PR registers takes two insns. */
5797 if (from
== PR_REGS
)
5799 /* Moving between PR and anything but GR is impossible. */
5800 if (from
!= GR_REGS
)
5801 return memory_move_cost (mode
, to
, false);
5805 /* Moving between BR and anything but GR is impossible. */
5806 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5807 return memory_move_cost (mode
, to
, false);
5812 /* Moving between AR and anything but GR is impossible. */
5813 if (from
!= GR_REGS
)
5814 return memory_move_cost (mode
, to
, false);
5820 case GR_AND_FR_REGS
:
5821 case GR_AND_BR_REGS
:
5832 /* Calculate the cost of moving data of MODE from a register to or from
5836 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
5838 bool in ATTRIBUTE_UNUSED
)
5840 if (rclass
== GENERAL_REGS
5841 || rclass
== FR_REGS
5842 || rclass
== FP_REGS
5843 || rclass
== GR_AND_FR_REGS
)
5849 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5850 on RCLASS to use when copying X into that class. */
5853 ia64_preferred_reload_class (rtx x
, reg_class_t rclass
)
5859 /* Don't allow volatile mem reloads into floating point registers.
5860 This is defined to force reload to choose the r/m case instead
5861 of the f/f case when reloading (set (reg fX) (mem/v)). */
5862 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5865 /* Force all unrecognized constants into the constant pool. */
5883 /* This function returns the register class required for a secondary
5884 register when copying between one of the registers in RCLASS, and X,
5885 using MODE. A return value of NO_REGS means that no secondary register
5889 ia64_secondary_reload_class (enum reg_class rclass
,
5890 machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5894 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5895 regno
= true_regnum (x
);
5902 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5903 interaction. We end up with two pseudos with overlapping lifetimes
5904 both of which are equiv to the same constant, and both which need
5905 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5906 changes depending on the path length, which means the qty_first_reg
5907 check in make_regs_eqv can give different answers at different times.
5908 At some point I'll probably need a reload_indi pattern to handle
5911 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5912 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5913 non-general registers for good measure. */
5914 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5917 /* This is needed if a pseudo used as a call_operand gets spilled to a
5919 if (GET_CODE (x
) == MEM
)
5925 /* Need to go through general registers to get to other class regs. */
5926 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5929 /* This can happen when a paradoxical subreg is an operand to the
5931 /* ??? This shouldn't be necessary after instruction scheduling is
5932 enabled, because paradoxical subregs are not accepted by
5933 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5934 stop the paradoxical subreg stupidity in the *_operand functions
5936 if (GET_CODE (x
) == MEM
5937 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5938 || GET_MODE (x
) == QImode
))
5941 /* This can happen because of the ior/and/etc patterns that accept FP
5942 registers as operands. If the third operand is a constant, then it
5943 needs to be reloaded into a FP register. */
5944 if (GET_CODE (x
) == CONST_INT
)
5947 /* This can happen because of register elimination in a muldi3 insn.
5948 E.g. `26107 * (unsigned long)&u'. */
5949 if (GET_CODE (x
) == PLUS
)
5954 /* ??? This happens if we cse/gcse a BImode value across a call,
5955 and the function has a nonlocal goto. This is because global
5956 does not allocate call crossing pseudos to hard registers when
5957 crtl->has_nonlocal_goto is true. This is relatively
5958 common for C++ programs that use exceptions. To reproduce,
5959 return NO_REGS and compile libstdc++. */
5960 if (GET_CODE (x
) == MEM
)
5963 /* This can happen when we take a BImode subreg of a DImode value,
5964 and that DImode value winds up in some non-GR register. */
5965 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5977 /* Implement targetm.unspec_may_trap_p hook. */
5979 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5981 switch (XINT (x
, 1))
5987 case UNSPEC_CHKACLR
:
5989 /* These unspecs are just wrappers. */
5990 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5993 return default_unspec_may_trap_p (x
, flags
);
5997 /* Parse the -mfixed-range= option string. */
6000 fix_range (const char *const_str
)
6003 char *str
, *dash
, *comma
;
6005 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6006 REG2 are either register names or register numbers. The effect
6007 of this option is to mark the registers in the range from REG1 to
6008 REG2 as ``fixed'' so they won't be used by the compiler. This is
6009 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6011 i
= strlen (const_str
);
6012 str
= (char *) alloca (i
+ 1);
6013 memcpy (str
, const_str
, i
+ 1);
6017 dash
= strchr (str
, '-');
6020 warning (0, "value of -mfixed-range must have form REG1-REG2");
6025 comma
= strchr (dash
+ 1, ',');
6029 first
= decode_reg_name (str
);
6032 warning (0, "unknown register name: %s", str
);
6036 last
= decode_reg_name (dash
+ 1);
6039 warning (0, "unknown register name: %s", dash
+ 1);
6047 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
6051 for (i
= first
; i
<= last
; ++i
)
6052 fixed_regs
[i
] = call_used_regs
[i
] = 1;
6062 /* Implement TARGET_OPTION_OVERRIDE. */
6065 ia64_option_override (void)
6068 cl_deferred_option
*opt
;
6069 vec
<cl_deferred_option
> *v
6070 = (vec
<cl_deferred_option
> *) ia64_deferred_options
;
6073 FOR_EACH_VEC_ELT (*v
, i
, opt
)
6075 switch (opt
->opt_index
)
6077 case OPT_mfixed_range_
:
6078 fix_range (opt
->arg
);
6086 if (TARGET_AUTO_PIC
)
6087 target_flags
|= MASK_CONST_GP
;
6089 /* Numerous experiment shows that IRA based loop pressure
6090 calculation works better for RTL loop invariant motion on targets
6091 with enough (>= 32) registers. It is an expensive optimization.
6092 So it is on only for peak performance. */
6094 flag_ira_loop_pressure
= 1;
6097 ia64_section_threshold
= (global_options_set
.x_g_switch_value
6099 : IA64_DEFAULT_GVALUE
);
6101 init_machine_status
= ia64_init_machine_status
;
6103 if (align_functions
<= 0)
6104 align_functions
= 64;
6105 if (align_loops
<= 0)
6107 if (TARGET_ABI_OPEN_VMS
)
6110 ia64_override_options_after_change();
6113 /* Implement targetm.override_options_after_change. */
6116 ia64_override_options_after_change (void)
6119 && !global_options_set
.x_flag_selective_scheduling
6120 && !global_options_set
.x_flag_selective_scheduling2
)
6122 flag_selective_scheduling2
= 1;
6123 flag_sel_sched_pipelining
= 1;
6125 if (mflag_sched_control_spec
== 2)
6127 /* Control speculation is on by default for the selective scheduler,
6128 but not for the Haifa scheduler. */
6129 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
6131 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
6133 /* FIXME: remove this when we'd implement breaking autoinsns as
6134 a transformation. */
6135 flag_auto_inc_dec
= 0;
6139 /* Initialize the record of emitted frame related registers. */
6141 void ia64_init_expanders (void)
6143 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
6146 static struct machine_function
*
6147 ia64_init_machine_status (void)
6149 return ggc_cleared_alloc
<machine_function
> ();
6152 static enum attr_itanium_class
ia64_safe_itanium_class (rtx_insn
*);
6153 static enum attr_type
ia64_safe_type (rtx_insn
*);
6155 static enum attr_itanium_class
6156 ia64_safe_itanium_class (rtx_insn
*insn
)
6158 if (recog_memoized (insn
) >= 0)
6159 return get_attr_itanium_class (insn
);
6160 else if (DEBUG_INSN_P (insn
))
6161 return ITANIUM_CLASS_IGNORE
;
6163 return ITANIUM_CLASS_UNKNOWN
;
6166 static enum attr_type
6167 ia64_safe_type (rtx_insn
*insn
)
6169 if (recog_memoized (insn
) >= 0)
6170 return get_attr_type (insn
);
6172 return TYPE_UNKNOWN
;
6175 /* The following collection of routines emit instruction group stop bits as
6176 necessary to avoid dependencies. */
6178 /* Need to track some additional registers as far as serialization is
6179 concerned so we can properly handle br.call and br.ret. We could
6180 make these registers visible to gcc, but since these registers are
6181 never explicitly used in gcc generated code, it seems wasteful to
6182 do so (plus it would make the call and return patterns needlessly
6184 #define REG_RP (BR_REG (0))
6185 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6186 /* This is used for volatile asms which may require a stop bit immediately
6187 before and after them. */
6188 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6189 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6190 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6192 /* For each register, we keep track of how it has been written in the
6193 current instruction group.
6195 If a register is written unconditionally (no qualifying predicate),
6196 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6198 If a register is written if its qualifying predicate P is true, we
6199 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6200 may be written again by the complement of P (P^1) and when this happens,
6201 WRITE_COUNT gets set to 2.
6203 The result of this is that whenever an insn attempts to write a register
6204 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6206 If a predicate register is written by a floating-point insn, we set
6207 WRITTEN_BY_FP to true.
6209 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6210 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6212 #if GCC_VERSION >= 4000
6213 #define RWS_FIELD_TYPE __extension__ unsigned short
6215 #define RWS_FIELD_TYPE unsigned int
6217 struct reg_write_state
6219 RWS_FIELD_TYPE write_count
: 2;
6220 RWS_FIELD_TYPE first_pred
: 10;
6221 RWS_FIELD_TYPE written_by_fp
: 1;
6222 RWS_FIELD_TYPE written_by_and
: 1;
6223 RWS_FIELD_TYPE written_by_or
: 1;
6226 /* Cumulative info for the current instruction group. */
6227 struct reg_write_state rws_sum
[NUM_REGS
];
6229 /* Bitmap whether a register has been written in the current insn. */
6230 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
6231 / HOST_BITS_PER_WIDEST_FAST_INT
];
6234 rws_insn_set (int regno
)
6236 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
6237 SET_HARD_REG_BIT (rws_insn
, regno
);
6241 rws_insn_test (int regno
)
6243 return TEST_HARD_REG_BIT (rws_insn
, regno
);
6246 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6247 unsigned char rws_insn
[2];
6250 rws_insn_set (int regno
)
6252 if (regno
== REG_AR_CFM
)
6254 else if (regno
== REG_VOLATILE
)
6259 rws_insn_test (int regno
)
6261 if (regno
== REG_AR_CFM
)
6263 if (regno
== REG_VOLATILE
)
6269 /* Indicates whether this is the first instruction after a stop bit,
6270 in which case we don't need another stop bit. Without this,
6271 ia64_variable_issue will die when scheduling an alloc. */
6272 static int first_instruction
;
6274 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6275 RTL for one instruction. */
6278 unsigned int is_write
: 1; /* Is register being written? */
6279 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
6280 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
6281 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
6282 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
6283 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
6286 static void rws_update (int, struct reg_flags
, int);
6287 static int rws_access_regno (int, struct reg_flags
, int);
6288 static int rws_access_reg (rtx
, struct reg_flags
, int);
6289 static void update_set_flags (rtx
, struct reg_flags
*);
6290 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
6291 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
6292 static void init_insn_group_barriers (void);
6293 static int group_barrier_needed (rtx_insn
*);
6294 static int safe_group_barrier_needed (rtx_insn
*);
6295 static int in_safe_group_barrier
;
6297 /* Update *RWS for REGNO, which is being written by the current instruction,
6298 with predicate PRED, and associated register flags in FLAGS. */
6301 rws_update (int regno
, struct reg_flags flags
, int pred
)
6304 rws_sum
[regno
].write_count
++;
6306 rws_sum
[regno
].write_count
= 2;
6307 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
6308 /* ??? Not tracking and/or across differing predicates. */
6309 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6310 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6311 rws_sum
[regno
].first_pred
= pred
;
6314 /* Handle an access to register REGNO of type FLAGS using predicate register
6315 PRED. Update rws_sum array. Return 1 if this access creates
6316 a dependency with an earlier instruction in the same group. */
6319 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
6321 int need_barrier
= 0;
6323 gcc_assert (regno
< NUM_REGS
);
6325 if (! PR_REGNO_P (regno
))
6326 flags
.is_and
= flags
.is_or
= 0;
6332 rws_insn_set (regno
);
6333 write_count
= rws_sum
[regno
].write_count
;
6335 switch (write_count
)
6338 /* The register has not been written yet. */
6339 if (!in_safe_group_barrier
)
6340 rws_update (regno
, flags
, pred
);
6344 /* The register has been written via a predicate. Treat
6345 it like a unconditional write and do not try to check
6346 for complementary pred reg in earlier write. */
6347 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6349 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6353 if (!in_safe_group_barrier
)
6354 rws_update (regno
, flags
, pred
);
6358 /* The register has been unconditionally written already. We
6360 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6362 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6366 if (!in_safe_group_barrier
)
6368 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6369 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6379 if (flags
.is_branch
)
6381 /* Branches have several RAW exceptions that allow to avoid
6384 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
6385 /* RAW dependencies on branch regs are permissible as long
6386 as the writer is a non-branch instruction. Since we
6387 never generate code that uses a branch register written
6388 by a branch instruction, handling this case is
6392 if (REGNO_REG_CLASS (regno
) == PR_REGS
6393 && ! rws_sum
[regno
].written_by_fp
)
6394 /* The predicates of a branch are available within the
6395 same insn group as long as the predicate was written by
6396 something other than a floating-point instruction. */
6400 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6402 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6405 switch (rws_sum
[regno
].write_count
)
6408 /* The register has not been written yet. */
6412 /* The register has been written via a predicate, assume we
6413 need a barrier (don't check for complementary regs). */
6418 /* The register has been unconditionally written already. We
6428 return need_barrier
;
6432 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
6434 int regno
= REGNO (reg
);
6435 int n
= REG_NREGS (reg
);
6438 return rws_access_regno (regno
, flags
, pred
);
6441 int need_barrier
= 0;
6443 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
6444 return need_barrier
;
6448 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6449 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6452 update_set_flags (rtx x
, struct reg_flags
*pflags
)
6454 rtx src
= SET_SRC (x
);
6456 switch (GET_CODE (src
))
6462 /* There are four cases here:
6463 (1) The destination is (pc), in which case this is a branch,
6464 nothing here applies.
6465 (2) The destination is ar.lc, in which case this is a
6466 doloop_end_internal,
6467 (3) The destination is an fp register, in which case this is
6468 an fselect instruction.
6469 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6470 this is a check load.
6471 In all cases, nothing we do in this function applies. */
6475 if (COMPARISON_P (src
)
6476 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
6477 /* Set pflags->is_fp to 1 so that we know we're dealing
6478 with a floating point comparison when processing the
6479 destination of the SET. */
6482 /* Discover if this is a parallel comparison. We only handle
6483 and.orcm and or.andcm at present, since we must retain a
6484 strict inverse on the predicate pair. */
6485 else if (GET_CODE (src
) == AND
)
6487 else if (GET_CODE (src
) == IOR
)
6494 /* Subroutine of rtx_needs_barrier; this function determines whether the
6495 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6496 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6500 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6502 int need_barrier
= 0;
6504 rtx src
= SET_SRC (x
);
6506 if (GET_CODE (src
) == CALL
)
6507 /* We don't need to worry about the result registers that
6508 get written by subroutine call. */
6509 return rtx_needs_barrier (src
, flags
, pred
);
6510 else if (SET_DEST (x
) == pc_rtx
)
6512 /* X is a conditional branch. */
6513 /* ??? This seems redundant, as the caller sets this bit for
6515 if (!ia64_spec_check_src_p (src
))
6516 flags
.is_branch
= 1;
6517 return rtx_needs_barrier (src
, flags
, pred
);
6520 if (ia64_spec_check_src_p (src
))
6521 /* Avoid checking one register twice (in condition
6522 and in 'then' section) for ldc pattern. */
6524 gcc_assert (REG_P (XEXP (src
, 2)));
6525 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
6527 /* We process MEM below. */
6528 src
= XEXP (src
, 1);
6531 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
6534 if (GET_CODE (dst
) == ZERO_EXTRACT
)
6536 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
6537 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
6539 return need_barrier
;
6542 /* Handle an access to rtx X of type FLAGS using predicate register
6543 PRED. Return 1 if this access creates a dependency with an earlier
6544 instruction in the same group. */
6547 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6550 int is_complemented
= 0;
6551 int need_barrier
= 0;
6552 const char *format_ptr
;
6553 struct reg_flags new_flags
;
6561 switch (GET_CODE (x
))
6564 update_set_flags (x
, &new_flags
);
6565 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
6566 if (GET_CODE (SET_SRC (x
)) != CALL
)
6568 new_flags
.is_write
= 1;
6569 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
6574 new_flags
.is_write
= 0;
6575 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6577 /* Avoid multiple register writes, in case this is a pattern with
6578 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6579 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
6581 new_flags
.is_write
= 1;
6582 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
6583 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
6584 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6589 /* X is a predicated instruction. */
6591 cond
= COND_EXEC_TEST (x
);
6593 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
6595 if (GET_CODE (cond
) == EQ
)
6596 is_complemented
= 1;
6597 cond
= XEXP (cond
, 0);
6598 gcc_assert (GET_CODE (cond
) == REG
6599 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
6600 pred
= REGNO (cond
);
6601 if (is_complemented
)
6604 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6605 return need_barrier
;
6609 /* Clobber & use are for earlier compiler-phases only. */
6614 /* We always emit stop bits for traditional asms. We emit stop bits
6615 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6616 if (GET_CODE (x
) != ASM_OPERANDS
6617 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6619 /* Avoid writing the register multiple times if we have multiple
6620 asm outputs. This avoids a failure in rws_access_reg. */
6621 if (! rws_insn_test (REG_VOLATILE
))
6623 new_flags
.is_write
= 1;
6624 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6629 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6630 We cannot just fall through here since then we would be confused
6631 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6632 traditional asms unlike their normal usage. */
6634 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6635 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6640 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6642 rtx pat
= XVECEXP (x
, 0, i
);
6643 switch (GET_CODE (pat
))
6646 update_set_flags (pat
, &new_flags
);
6647 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6654 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6658 if (REG_P (XEXP (pat
, 0))
6659 && extract_asm_operands (x
) != NULL_RTX
6660 && REGNO (XEXP (pat
, 0)) != AR_UNAT_REGNUM
)
6662 new_flags
.is_write
= 1;
6663 need_barrier
|= rtx_needs_barrier (XEXP (pat
, 0),
6676 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6678 rtx pat
= XVECEXP (x
, 0, i
);
6679 if (GET_CODE (pat
) == SET
)
6681 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6683 new_flags
.is_write
= 1;
6684 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6688 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6689 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6694 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6697 if (REGNO (x
) == AR_UNAT_REGNUM
)
6699 for (i
= 0; i
< 64; ++i
)
6700 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6703 need_barrier
= rws_access_reg (x
, flags
, pred
);
6707 /* Find the regs used in memory address computation. */
6708 new_flags
.is_write
= 0;
6709 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6712 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6713 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6716 /* Operators with side-effects. */
6717 case POST_INC
: case POST_DEC
:
6718 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6720 new_flags
.is_write
= 0;
6721 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6722 new_flags
.is_write
= 1;
6723 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6727 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6729 new_flags
.is_write
= 0;
6730 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6731 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6732 new_flags
.is_write
= 1;
6733 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6736 /* Handle common unary and binary ops for efficiency. */
6737 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6738 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6739 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6740 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6741 case NE
: case EQ
: case GE
: case GT
: case LE
:
6742 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6743 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6744 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6747 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6748 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6749 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6750 case SQRT
: case FFS
: case POPCOUNT
:
6751 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6755 /* VEC_SELECT's second argument is a PARALLEL with integers that
6756 describe the elements selected. On ia64, those integers are
6757 always constants. Avoid walking the PARALLEL so that we don't
6758 get confused with "normal" parallels and then die. */
6759 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6763 switch (XINT (x
, 1))
6765 case UNSPEC_LTOFF_DTPMOD
:
6766 case UNSPEC_LTOFF_DTPREL
:
6768 case UNSPEC_LTOFF_TPREL
:
6770 case UNSPEC_PRED_REL_MUTEX
:
6771 case UNSPEC_PIC_CALL
:
6773 case UNSPEC_FETCHADD_ACQ
:
6774 case UNSPEC_FETCHADD_REL
:
6775 case UNSPEC_BSP_VALUE
:
6776 case UNSPEC_FLUSHRS
:
6777 case UNSPEC_BUNDLE_SELECTOR
:
6780 case UNSPEC_GR_SPILL
:
6781 case UNSPEC_GR_RESTORE
:
6783 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6784 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6786 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6787 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6788 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6793 case UNSPEC_FR_SPILL
:
6794 case UNSPEC_FR_RESTORE
:
6795 case UNSPEC_GETF_EXP
:
6796 case UNSPEC_SETF_EXP
:
6798 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6799 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6804 case UNSPEC_CHKACLR
:
6806 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6809 case UNSPEC_FR_RECIP_APPROX
:
6811 case UNSPEC_COPYSIGN
:
6812 case UNSPEC_FR_RECIP_APPROX_RES
:
6813 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6814 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6817 case UNSPEC_CMPXCHG_ACQ
:
6818 case UNSPEC_CMPXCHG_REL
:
6819 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6820 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6828 case UNSPEC_VOLATILE
:
6829 switch (XINT (x
, 1))
6832 /* Alloc must always be the first instruction of a group.
6833 We force this by always returning true. */
6834 /* ??? We might get better scheduling if we explicitly check for
6835 input/local/output register dependencies, and modify the
6836 scheduler so that alloc is always reordered to the start of
6837 the current group. We could then eliminate all of the
6838 first_instruction code. */
6839 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6841 new_flags
.is_write
= 1;
6842 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6845 case UNSPECV_SET_BSP
:
6846 case UNSPECV_PROBE_STACK_RANGE
:
6850 case UNSPECV_BLOCKAGE
:
6851 case UNSPECV_INSN_GROUP_BARRIER
:
6853 case UNSPECV_PSAC_ALL
:
6854 case UNSPECV_PSAC_NORMAL
:
6857 case UNSPECV_PROBE_STACK_ADDRESS
:
6858 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6867 new_flags
.is_write
= 0;
6868 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6869 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6871 new_flags
.is_write
= 1;
6872 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6873 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6877 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6878 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6879 switch (format_ptr
[i
])
6881 case '0': /* unused field */
6882 case 'i': /* integer */
6883 case 'n': /* note */
6884 case 'w': /* wide integer */
6885 case 's': /* pointer to string */
6886 case 'S': /* optional pointer to string */
6890 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6895 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6896 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6905 return need_barrier
;
6908 /* Clear out the state for group_barrier_needed at the start of a
6909 sequence of insns. */
6912 init_insn_group_barriers (void)
6914 memset (rws_sum
, 0, sizeof (rws_sum
));
6915 first_instruction
= 1;
6918 /* Given the current state, determine whether a group barrier (a stop bit) is
6919 necessary before INSN. Return nonzero if so. This modifies the state to
6920 include the effects of INSN as a side-effect. */
6923 group_barrier_needed (rtx_insn
*insn
)
6926 int need_barrier
= 0;
6927 struct reg_flags flags
;
6929 memset (&flags
, 0, sizeof (flags
));
6930 switch (GET_CODE (insn
))
6937 /* A barrier doesn't imply an instruction group boundary. */
6941 memset (rws_insn
, 0, sizeof (rws_insn
));
6945 flags
.is_branch
= 1;
6946 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6947 memset (rws_insn
, 0, sizeof (rws_insn
));
6949 /* Don't bundle a call following another call. */
6950 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6956 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6960 if (!ia64_spec_check_p (insn
))
6961 flags
.is_branch
= 1;
6963 /* Don't bundle a jump following a call. */
6964 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6972 if (GET_CODE (PATTERN (insn
)) == USE
6973 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6974 /* Don't care about USE and CLOBBER "insns"---those are used to
6975 indicate to the optimizer that it shouldn't get rid of
6976 certain operations. */
6979 pat
= PATTERN (insn
);
6981 /* Ug. Hack hacks hacked elsewhere. */
6982 switch (recog_memoized (insn
))
6984 /* We play dependency tricks with the epilogue in order
6985 to get proper schedules. Undo this for dv analysis. */
6986 case CODE_FOR_epilogue_deallocate_stack
:
6987 case CODE_FOR_prologue_allocate_stack
:
6988 pat
= XVECEXP (pat
, 0, 0);
6991 /* The pattern we use for br.cloop confuses the code above.
6992 The second element of the vector is representative. */
6993 case CODE_FOR_doloop_end_internal
:
6994 pat
= XVECEXP (pat
, 0, 1);
6997 /* Doesn't generate code. */
6998 case CODE_FOR_pred_rel_mutex
:
6999 case CODE_FOR_prologue_use
:
7006 memset (rws_insn
, 0, sizeof (rws_insn
));
7007 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
7009 /* Check to see if the previous instruction was a volatile
7012 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
7020 if (first_instruction
&& important_for_bundling_p (insn
))
7023 first_instruction
= 0;
7026 return need_barrier
;
7029 /* Like group_barrier_needed, but do not clobber the current state. */
7032 safe_group_barrier_needed (rtx_insn
*insn
)
7034 int saved_first_instruction
;
7037 saved_first_instruction
= first_instruction
;
7038 in_safe_group_barrier
= 1;
7040 t
= group_barrier_needed (insn
);
7042 first_instruction
= saved_first_instruction
;
7043 in_safe_group_barrier
= 0;
7048 /* Scan the current function and insert stop bits as necessary to
7049 eliminate dependencies. This function assumes that a final
7050 instruction scheduling pass has been run which has already
7051 inserted most of the necessary stop bits. This function only
7052 inserts new ones at basic block boundaries, since these are
7053 invisible to the scheduler. */
7056 emit_insn_group_barriers (FILE *dump
)
7059 rtx_insn
*last_label
= 0;
7060 int insns_since_last_label
= 0;
7062 init_insn_group_barriers ();
7064 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7068 if (insns_since_last_label
)
7070 insns_since_last_label
= 0;
7072 else if (NOTE_P (insn
)
7073 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
7075 if (insns_since_last_label
)
7077 insns_since_last_label
= 0;
7079 else if (NONJUMP_INSN_P (insn
)
7080 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7081 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7083 init_insn_group_barriers ();
7086 else if (NONDEBUG_INSN_P (insn
))
7088 insns_since_last_label
= 1;
7090 if (group_barrier_needed (insn
))
7095 fprintf (dump
, "Emitting stop before label %d\n",
7096 INSN_UID (last_label
));
7097 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
7100 init_insn_group_barriers ();
7108 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7109 This function has to emit all necessary group barriers. */
7112 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
7116 init_insn_group_barriers ();
7118 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7120 if (BARRIER_P (insn
))
7122 rtx_insn
*last
= prev_active_insn (insn
);
7126 if (JUMP_TABLE_DATA_P (last
))
7127 last
= prev_active_insn (last
);
7128 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7129 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7131 init_insn_group_barriers ();
7133 else if (NONDEBUG_INSN_P (insn
))
7135 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7136 init_insn_group_barriers ();
7137 else if (group_barrier_needed (insn
))
7139 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
7140 init_insn_group_barriers ();
7141 group_barrier_needed (insn
);
7149 /* Instruction scheduling support. */
7151 #define NR_BUNDLES 10
7153 /* A list of names of all available bundles. */
7155 static const char *bundle_name
[NR_BUNDLES
] =
7161 #if NR_BUNDLES == 10
7171 /* Nonzero if we should insert stop bits into the schedule. */
7173 int ia64_final_schedule
= 0;
7175 /* Codes of the corresponding queried units: */
7177 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
7178 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
7180 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
7181 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
7183 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
7185 /* The following variable value is an insn group barrier. */
7187 static rtx_insn
*dfa_stop_insn
;
7189 /* The following variable value is the last issued insn. */
7191 static rtx_insn
*last_scheduled_insn
;
7193 /* The following variable value is pointer to a DFA state used as
7194 temporary variable. */
7196 static state_t temp_dfa_state
= NULL
;
7198 /* The following variable value is DFA state after issuing the last
7201 static state_t prev_cycle_state
= NULL
;
7203 /* The following array element values are TRUE if the corresponding
7204 insn requires to add stop bits before it. */
7206 static char *stops_p
= NULL
;
7208 /* The following variable is used to set up the mentioned above array. */
7210 static int stop_before_p
= 0;
7212 /* The following variable value is length of the arrays `clocks' and
7215 static int clocks_length
;
7217 /* The following variable value is number of data speculations in progress. */
7218 static int pending_data_specs
= 0;
7220 /* Number of memory references on current and three future processor cycles. */
7221 static char mem_ops_in_group
[4];
7223 /* Number of current processor cycle (from scheduler's point of view). */
7224 static int current_cycle
;
7226 static rtx
ia64_single_set (rtx_insn
*);
7227 static void ia64_emit_insn_before (rtx
, rtx_insn
*);
7229 /* Map a bundle number to its pseudo-op. */
7232 get_bundle_name (int b
)
7234 return bundle_name
[b
];
7238 /* Return the maximum number of instructions a cpu can issue. */
7241 ia64_issue_rate (void)
7246 /* Helper function - like single_set, but look inside COND_EXEC. */
7249 ia64_single_set (rtx_insn
*insn
)
7251 rtx x
= PATTERN (insn
), ret
;
7252 if (GET_CODE (x
) == COND_EXEC
)
7253 x
= COND_EXEC_CODE (x
);
7254 if (GET_CODE (x
) == SET
)
7257 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7258 Although they are not classical single set, the second set is there just
7259 to protect it from moving past FP-relative stack accesses. */
7260 switch (recog_memoized (insn
))
7262 case CODE_FOR_prologue_allocate_stack
:
7263 case CODE_FOR_prologue_allocate_stack_pr
:
7264 case CODE_FOR_epilogue_deallocate_stack
:
7265 case CODE_FOR_epilogue_deallocate_stack_pr
:
7266 ret
= XVECEXP (x
, 0, 0);
7270 ret
= single_set_2 (insn
, x
);
7277 /* Adjust the cost of a scheduling dependency.
7278 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7279 COST is the current cost, DW is dependency weakness. */
7281 ia64_adjust_cost (rtx_insn
*insn
, int dep_type1
, rtx_insn
*dep_insn
,
7284 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
7285 enum attr_itanium_class dep_class
;
7286 enum attr_itanium_class insn_class
;
7288 insn_class
= ia64_safe_itanium_class (insn
);
7289 dep_class
= ia64_safe_itanium_class (dep_insn
);
7291 /* Treat true memory dependencies separately. Ignore apparent true
7292 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7293 if (dep_type
== REG_DEP_TRUE
7294 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
7295 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
7298 if (dw
== MIN_DEP_WEAK
)
7299 /* Store and load are likely to alias, use higher cost to avoid stall. */
7300 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST
);
7301 else if (dw
> MIN_DEP_WEAK
)
7303 /* Store and load are less likely to alias. */
7304 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
7305 /* Assume there will be no cache conflict for floating-point data.
7306 For integer data, L1 conflict penalty is huge (17 cycles), so we
7307 never assume it will not cause a conflict. */
7313 if (dep_type
!= REG_DEP_OUTPUT
)
7316 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
7317 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
7323 /* Like emit_insn_before, but skip cycle_display notes.
7324 ??? When cycle display notes are implemented, update this. */
7327 ia64_emit_insn_before (rtx insn
, rtx_insn
*before
)
7329 emit_insn_before (insn
, before
);
7332 /* The following function marks insns who produce addresses for load
7333 and store insns. Such insns will be placed into M slots because it
7334 decrease latency time for Itanium1 (see function
7335 `ia64_produce_address_p' and the DFA descriptions). */
7338 ia64_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
7340 rtx_insn
*insn
, *next
, *next_tail
;
7342 /* Before reload, which_alternative is not set, which means that
7343 ia64_safe_itanium_class will produce wrong results for (at least)
7344 move instructions. */
7345 if (!reload_completed
)
7348 next_tail
= NEXT_INSN (tail
);
7349 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7352 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7354 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
7356 sd_iterator_def sd_it
;
7358 bool has_mem_op_consumer_p
= false;
7360 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
7362 enum attr_itanium_class c
;
7364 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
7367 next
= DEP_CON (dep
);
7368 c
= ia64_safe_itanium_class (next
);
7369 if ((c
== ITANIUM_CLASS_ST
7370 || c
== ITANIUM_CLASS_STF
)
7371 && ia64_st_address_bypass_p (insn
, next
))
7373 has_mem_op_consumer_p
= true;
7376 else if ((c
== ITANIUM_CLASS_LD
7377 || c
== ITANIUM_CLASS_FLD
7378 || c
== ITANIUM_CLASS_FLDP
)
7379 && ia64_ld_address_bypass_p (insn
, next
))
7381 has_mem_op_consumer_p
= true;
7386 insn
->call
= has_mem_op_consumer_p
;
7390 /* We're beginning a new block. Initialize data structures as necessary. */
7393 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
7394 int sched_verbose ATTRIBUTE_UNUSED
,
7395 int max_ready ATTRIBUTE_UNUSED
)
7397 if (flag_checking
&& !sel_sched_p () && reload_completed
)
7399 for (rtx_insn
*insn
= NEXT_INSN (current_sched_info
->prev_head
);
7400 insn
!= current_sched_info
->next_tail
;
7401 insn
= NEXT_INSN (insn
))
7402 gcc_assert (!SCHED_GROUP_P (insn
));
7404 last_scheduled_insn
= NULL
;
7405 init_insn_group_barriers ();
7408 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7411 /* We're beginning a scheduling pass. Check assertion. */
7414 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
7415 int sched_verbose ATTRIBUTE_UNUSED
,
7416 int max_ready ATTRIBUTE_UNUSED
)
7418 gcc_assert (pending_data_specs
== 0);
7421 /* Scheduling pass is now finished. Free/reset static variable. */
7423 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
7424 int sched_verbose ATTRIBUTE_UNUSED
)
7426 gcc_assert (pending_data_specs
== 0);
7429 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7430 speculation check), FALSE otherwise. */
7432 is_load_p (rtx_insn
*insn
)
7434 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7437 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
7438 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
7441 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7442 (taking account for 3-cycle cache reference postponing for stores: Intel
7443 Itanium 2 Reference Manual for Software Development and Optimization,
7446 record_memory_reference (rtx_insn
*insn
)
7448 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7450 switch (insn_class
) {
7451 case ITANIUM_CLASS_FLD
:
7452 case ITANIUM_CLASS_LD
:
7453 mem_ops_in_group
[current_cycle
% 4]++;
7455 case ITANIUM_CLASS_STF
:
7456 case ITANIUM_CLASS_ST
:
7457 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
7463 /* We are about to being issuing insns for this clock cycle.
7464 Override the default sort algorithm to better slot instructions. */
7467 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7468 int *pn_ready
, int clock_var
,
7472 int n_ready
= *pn_ready
;
7473 rtx_insn
**e_ready
= ready
+ n_ready
;
7477 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
7479 if (reorder_type
== 0)
7481 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7483 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7484 if (insnp
< e_ready
)
7486 rtx_insn
*insn
= *insnp
;
7487 enum attr_type t
= ia64_safe_type (insn
);
7488 if (t
== TYPE_UNKNOWN
)
7490 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7491 || asm_noperands (PATTERN (insn
)) >= 0)
7493 rtx_insn
*lowest
= ready
[n_asms
];
7494 ready
[n_asms
] = insn
;
7500 rtx_insn
*highest
= ready
[n_ready
- 1];
7501 ready
[n_ready
- 1] = insn
;
7508 if (n_asms
< n_ready
)
7510 /* Some normal insns to process. Skip the asms. */
7514 else if (n_ready
> 0)
7518 if (ia64_final_schedule
)
7521 int nr_need_stop
= 0;
7523 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7524 if (safe_group_barrier_needed (*insnp
))
7527 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
7529 if (reorder_type
== 0)
7532 /* Move down everything that needs a stop bit, preserving
7534 while (insnp
-- > ready
+ deleted
)
7535 while (insnp
>= ready
+ deleted
)
7537 rtx_insn
*insn
= *insnp
;
7538 if (! safe_group_barrier_needed (insn
))
7540 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7548 current_cycle
= clock_var
;
7549 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
7554 /* Move down loads/stores, preserving relative order. */
7555 while (insnp
-- > ready
+ moved
)
7556 while (insnp
>= ready
+ moved
)
7558 rtx_insn
*insn
= *insnp
;
7559 if (! is_load_p (insn
))
7561 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7572 /* We are about to being issuing insns for this clock cycle. Override
7573 the default sort algorithm to better slot instructions. */
7576 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7577 int *pn_ready
, int clock_var
)
7579 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
7580 pn_ready
, clock_var
, 0);
7583 /* Like ia64_sched_reorder, but called after issuing each insn.
7584 Override the default sort algorithm to better slot instructions. */
7587 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
7588 int sched_verbose ATTRIBUTE_UNUSED
, rtx_insn
**ready
,
7589 int *pn_ready
, int clock_var
)
7591 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
7595 /* We are about to issue INSN. Return the number of insns left on the
7596 ready queue that can be issued this cycle. */
7599 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
7600 int sched_verbose ATTRIBUTE_UNUSED
,
7602 int can_issue_more ATTRIBUTE_UNUSED
)
7604 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
7605 /* Modulo scheduling does not extend h_i_d when emitting
7606 new instructions. Don't use h_i_d, if we don't have to. */
7608 if (DONE_SPEC (insn
) & BEGIN_DATA
)
7609 pending_data_specs
++;
7610 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
7611 pending_data_specs
--;
7614 if (DEBUG_INSN_P (insn
))
7617 last_scheduled_insn
= insn
;
7618 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7619 if (reload_completed
)
7621 int needed
= group_barrier_needed (insn
);
7623 gcc_assert (!needed
);
7625 init_insn_group_barriers ();
7626 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7629 record_memory_reference (insn
);
7634 /* We are choosing insn from the ready queue. Return zero if INSN
7638 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
7640 gcc_assert (insn
&& INSN_P (insn
));
7642 /* Size of ALAT is 32. As far as we perform conservative
7643 data speculation, we keep ALAT half-empty. */
7644 if (pending_data_specs
>= 16 && (TODO_SPEC (insn
) & BEGIN_DATA
))
7645 return ready_index
== 0 ? -1 : 1;
7647 if (ready_index
== 0)
7650 if ((!reload_completed
7651 || !safe_group_barrier_needed (insn
))
7652 && (!mflag_sched_mem_insns_hard_limit
7653 || !is_load_p (insn
)
7654 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
))
7660 /* The following variable value is pseudo-insn used by the DFA insn
7661 scheduler to change the DFA state when the simulated clock is
7664 static rtx_insn
*dfa_pre_cycle_insn
;
7666 /* Returns 1 when a meaningful insn was scheduled between the last group
7667 barrier and LAST. */
7669 scheduled_good_insn (rtx_insn
*last
)
7671 if (last
&& recog_memoized (last
) >= 0)
7675 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7676 && !stops_p
[INSN_UID (last
)];
7677 last
= PREV_INSN (last
))
7678 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7679 the ebb we're scheduling. */
7680 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7686 /* We are about to being issuing INSN. Return nonzero if we cannot
7687 issue it on given cycle CLOCK and return zero if we should not sort
7688 the ready queue on the next clock start. */
7691 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx_insn
*insn
, int last_clock
,
7692 int clock
, int *sort_p
)
7694 gcc_assert (insn
&& INSN_P (insn
));
7696 if (DEBUG_INSN_P (insn
))
7699 /* When a group barrier is needed for insn, last_scheduled_insn
7701 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7702 || last_scheduled_insn
);
7704 if ((reload_completed
7705 && (safe_group_barrier_needed (insn
)
7706 || (mflag_sched_stop_bits_after_every_cycle
7707 && last_clock
!= clock
7708 && last_scheduled_insn
7709 && scheduled_good_insn (last_scheduled_insn
))))
7710 || (last_scheduled_insn
7711 && (CALL_P (last_scheduled_insn
)
7712 || unknown_for_bundling_p (last_scheduled_insn
))))
7714 init_insn_group_barriers ();
7716 if (verbose
&& dump
)
7717 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7718 last_clock
== clock
? " + cycle advance" : "");
7721 current_cycle
= clock
;
7722 mem_ops_in_group
[current_cycle
% 4] = 0;
7724 if (last_clock
== clock
)
7726 state_transition (curr_state
, dfa_stop_insn
);
7727 if (TARGET_EARLY_STOP_BITS
)
7728 *sort_p
= (last_scheduled_insn
== NULL_RTX
7729 || ! CALL_P (last_scheduled_insn
));
7735 if (last_scheduled_insn
)
7737 if (unknown_for_bundling_p (last_scheduled_insn
))
7738 state_reset (curr_state
);
7741 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7742 state_transition (curr_state
, dfa_stop_insn
);
7743 state_transition (curr_state
, dfa_pre_cycle_insn
);
7744 state_transition (curr_state
, NULL
);
7751 /* Implement targetm.sched.h_i_d_extended hook.
7752 Extend internal data structures. */
7754 ia64_h_i_d_extended (void)
7756 if (stops_p
!= NULL
)
7758 int new_clocks_length
= get_max_uid () * 3 / 2;
7759 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7760 clocks_length
= new_clocks_length
;
7765 /* This structure describes the data used by the backend to guide scheduling.
7766 When the current scheduling point is switched, this data should be saved
7767 and restored later, if the scheduler returns to this point. */
7768 struct _ia64_sched_context
7770 state_t prev_cycle_state
;
7771 rtx_insn
*last_scheduled_insn
;
7772 struct reg_write_state rws_sum
[NUM_REGS
];
7773 struct reg_write_state rws_insn
[NUM_REGS
];
7774 int first_instruction
;
7775 int pending_data_specs
;
7777 char mem_ops_in_group
[4];
7779 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7781 /* Allocates a scheduling context. */
7783 ia64_alloc_sched_context (void)
7785 return xmalloc (sizeof (struct _ia64_sched_context
));
7788 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7789 the global context otherwise. */
7791 ia64_init_sched_context (void *_sc
, bool clean_p
)
7793 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7795 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7798 state_reset (sc
->prev_cycle_state
);
7799 sc
->last_scheduled_insn
= NULL
;
7800 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7801 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7802 sc
->first_instruction
= 1;
7803 sc
->pending_data_specs
= 0;
7804 sc
->current_cycle
= 0;
7805 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7809 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7810 sc
->last_scheduled_insn
= last_scheduled_insn
;
7811 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7812 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7813 sc
->first_instruction
= first_instruction
;
7814 sc
->pending_data_specs
= pending_data_specs
;
7815 sc
->current_cycle
= current_cycle
;
7816 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7820 /* Sets the global scheduling context to the one pointed to by _SC. */
7822 ia64_set_sched_context (void *_sc
)
7824 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7826 gcc_assert (sc
!= NULL
);
7828 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7829 last_scheduled_insn
= sc
->last_scheduled_insn
;
7830 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7831 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7832 first_instruction
= sc
->first_instruction
;
7833 pending_data_specs
= sc
->pending_data_specs
;
7834 current_cycle
= sc
->current_cycle
;
7835 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7838 /* Clears the data in the _SC scheduling context. */
7840 ia64_clear_sched_context (void *_sc
)
7842 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7844 free (sc
->prev_cycle_state
);
7845 sc
->prev_cycle_state
= NULL
;
7848 /* Frees the _SC scheduling context. */
7850 ia64_free_sched_context (void *_sc
)
7852 gcc_assert (_sc
!= NULL
);
7857 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7859 /* Return a function that will generate a load of mode MODE_NO
7860 with speculation types TS. */
7862 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7864 static gen_func_t gen_ld_
[] = {
7874 gen_zero_extendqidi2
,
7875 gen_zero_extendhidi2
,
7876 gen_zero_extendsidi2
,
7879 static gen_func_t gen_ld_a
[] = {
7889 gen_zero_extendqidi2_advanced
,
7890 gen_zero_extendhidi2_advanced
,
7891 gen_zero_extendsidi2_advanced
,
7893 static gen_func_t gen_ld_s
[] = {
7894 gen_movbi_speculative
,
7895 gen_movqi_speculative
,
7896 gen_movhi_speculative
,
7897 gen_movsi_speculative
,
7898 gen_movdi_speculative
,
7899 gen_movsf_speculative
,
7900 gen_movdf_speculative
,
7901 gen_movxf_speculative
,
7902 gen_movti_speculative
,
7903 gen_zero_extendqidi2_speculative
,
7904 gen_zero_extendhidi2_speculative
,
7905 gen_zero_extendsidi2_speculative
,
7907 static gen_func_t gen_ld_sa
[] = {
7908 gen_movbi_speculative_advanced
,
7909 gen_movqi_speculative_advanced
,
7910 gen_movhi_speculative_advanced
,
7911 gen_movsi_speculative_advanced
,
7912 gen_movdi_speculative_advanced
,
7913 gen_movsf_speculative_advanced
,
7914 gen_movdf_speculative_advanced
,
7915 gen_movxf_speculative_advanced
,
7916 gen_movti_speculative_advanced
,
7917 gen_zero_extendqidi2_speculative_advanced
,
7918 gen_zero_extendhidi2_speculative_advanced
,
7919 gen_zero_extendsidi2_speculative_advanced
,
7921 static gen_func_t gen_ld_s_a
[] = {
7922 gen_movbi_speculative_a
,
7923 gen_movqi_speculative_a
,
7924 gen_movhi_speculative_a
,
7925 gen_movsi_speculative_a
,
7926 gen_movdi_speculative_a
,
7927 gen_movsf_speculative_a
,
7928 gen_movdf_speculative_a
,
7929 gen_movxf_speculative_a
,
7930 gen_movti_speculative_a
,
7931 gen_zero_extendqidi2_speculative_a
,
7932 gen_zero_extendhidi2_speculative_a
,
7933 gen_zero_extendsidi2_speculative_a
,
7938 if (ts
& BEGIN_DATA
)
7940 if (ts
& BEGIN_CONTROL
)
7945 else if (ts
& BEGIN_CONTROL
)
7947 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7948 || ia64_needs_block_p (ts
))
7951 gen_ld
= gen_ld_s_a
;
7958 return gen_ld
[mode_no
];
7961 /* Constants that help mapping 'machine_mode' to int. */
7964 SPEC_MODE_INVALID
= -1,
7965 SPEC_MODE_FIRST
= 0,
7966 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7967 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7973 /* Offset to reach ZERO_EXTEND patterns. */
7974 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
7977 /* Return index of the MODE. */
7979 ia64_mode_to_int (machine_mode mode
)
7983 case E_BImode
: return 0; /* SPEC_MODE_FIRST */
7984 case E_QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7985 case E_HImode
: return 2;
7986 case E_SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7987 case E_DImode
: return 4;
7988 case E_SFmode
: return 5;
7989 case E_DFmode
: return 6;
7990 case E_XFmode
: return 7;
7992 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7993 mentioned in itanium[12].md. Predicate fp_register_operand also
7994 needs to be defined. Bottom line: better disable for now. */
7995 return SPEC_MODE_INVALID
;
7996 default: return SPEC_MODE_INVALID
;
8000 /* Provide information about speculation capabilities. */
8002 ia64_set_sched_flags (spec_info_t spec_info
)
8004 unsigned int *flags
= &(current_sched_info
->flags
);
8006 if (*flags
& SCHED_RGN
8007 || *flags
& SCHED_EBB
8008 || *flags
& SEL_SCHED
)
8012 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
8013 || (mflag_sched_ar_data_spec
&& reload_completed
))
8018 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
8019 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
8023 if (mflag_sched_control_spec
8025 || reload_completed
))
8027 mask
|= BEGIN_CONTROL
;
8029 if (!sel_sched_p () && mflag_sched_in_control_spec
)
8030 mask
|= BE_IN_CONTROL
;
8033 spec_info
->mask
= mask
;
8037 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
8039 if (mask
& BE_IN_SPEC
)
8042 spec_info
->flags
= 0;
8044 if ((mask
& CONTROL_SPEC
)
8045 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
8046 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
8048 if (sched_verbose
>= 1)
8049 spec_info
->dump
= sched_dump
;
8051 spec_info
->dump
= 0;
8053 if (mflag_sched_count_spec_in_critical_path
)
8054 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
8058 spec_info
->mask
= 0;
8061 /* If INSN is an appropriate load return its mode.
8062 Return -1 otherwise. */
8064 get_mode_no_for_insn (rtx_insn
*insn
)
8066 rtx reg
, mem
, mode_rtx
;
8070 extract_insn_cached (insn
);
8072 /* We use WHICH_ALTERNATIVE only after reload. This will
8073 guarantee that reload won't touch a speculative insn. */
8075 if (recog_data
.n_operands
!= 2)
8078 reg
= recog_data
.operand
[0];
8079 mem
= recog_data
.operand
[1];
8081 /* We should use MEM's mode since REG's mode in presence of
8082 ZERO_EXTEND will always be DImode. */
8083 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
8084 /* Process non-speculative ld. */
8086 if (!reload_completed
)
8088 /* Do not speculate into regs like ar.lc. */
8089 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
8096 rtx mem_reg
= XEXP (mem
, 0);
8098 if (!REG_P (mem_reg
))
8104 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
8106 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8112 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
8113 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
8114 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
8115 /* Process speculative ld or ld.c. */
8117 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8122 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
8124 if (attr_class
== ITANIUM_CLASS_CHK_A
8125 || attr_class
== ITANIUM_CLASS_CHK_S_I
8126 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
8133 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
8135 if (mode_no
== SPEC_MODE_INVALID
)
8138 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
8142 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
8143 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
8146 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
8152 /* If X is an unspec part of a speculative load, return its code.
8153 Return -1 otherwise. */
8155 get_spec_unspec_code (const_rtx x
)
8157 if (GET_CODE (x
) != UNSPEC
)
8179 /* Implement skip_rtx_p hook. */
8181 ia64_skip_rtx_p (const_rtx x
)
8183 return get_spec_unspec_code (x
) != -1;
8186 /* If INSN is a speculative load, return its UNSPEC code.
8187 Return -1 otherwise. */
8189 get_insn_spec_code (const_rtx insn
)
8193 pat
= PATTERN (insn
);
8195 if (GET_CODE (pat
) == COND_EXEC
)
8196 pat
= COND_EXEC_CODE (pat
);
8198 if (GET_CODE (pat
) != SET
)
8201 reg
= SET_DEST (pat
);
8205 mem
= SET_SRC (pat
);
8206 if (GET_CODE (mem
) == ZERO_EXTEND
)
8207 mem
= XEXP (mem
, 0);
8209 return get_spec_unspec_code (mem
);
8212 /* If INSN is a speculative load, return a ds with the speculation types.
8213 Otherwise [if INSN is a normal instruction] return 0. */
8215 ia64_get_insn_spec_ds (rtx_insn
*insn
)
8217 int code
= get_insn_spec_code (insn
);
8226 return BEGIN_CONTROL
;
8229 return BEGIN_DATA
| BEGIN_CONTROL
;
8236 /* If INSN is a speculative load return a ds with the speculation types that
8238 Otherwise [if INSN is a normal instruction] return 0. */
8240 ia64_get_insn_checked_ds (rtx_insn
*insn
)
8242 int code
= get_insn_spec_code (insn
);
8247 return BEGIN_DATA
| BEGIN_CONTROL
;
8250 return BEGIN_CONTROL
;
8254 return BEGIN_DATA
| BEGIN_CONTROL
;
8261 /* If GEN_P is true, calculate the index of needed speculation check and return
8262 speculative pattern for INSN with speculative mode TS, machine mode
8263 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8264 If GEN_P is false, just calculate the index of needed speculation check. */
8266 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
8269 gen_func_t gen_load
;
8271 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
8273 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
8274 copy_rtx (recog_data
.operand
[1]));
8276 pat
= PATTERN (insn
);
8277 if (GET_CODE (pat
) == COND_EXEC
)
8278 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8285 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
8286 ds_t ds ATTRIBUTE_UNUSED
)
8291 /* Implement targetm.sched.speculate_insn hook.
8292 Check if the INSN can be TS speculative.
8293 If 'no' - return -1.
8294 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8295 If current pattern of the INSN already provides TS speculation,
8298 ia64_speculate_insn (rtx_insn
*insn
, ds_t ts
, rtx
*new_pat
)
8303 gcc_assert (!(ts
& ~SPECULATIVE
));
8305 if (ia64_spec_check_p (insn
))
8308 if ((ts
& BE_IN_SPEC
)
8309 && !insn_can_be_in_speculative_p (insn
, ts
))
8312 mode_no
= get_mode_no_for_insn (insn
);
8314 if (mode_no
!= SPEC_MODE_INVALID
)
8316 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
8321 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
8330 /* Return a function that will generate a check for speculation TS with mode
8332 If simple check is needed, pass true for SIMPLE_CHECK_P.
8333 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8335 get_spec_check_gen_function (ds_t ts
, int mode_no
,
8336 bool simple_check_p
, bool clearing_check_p
)
8338 static gen_func_t gen_ld_c_clr
[] = {
8348 gen_zero_extendqidi2_clr
,
8349 gen_zero_extendhidi2_clr
,
8350 gen_zero_extendsidi2_clr
,
8352 static gen_func_t gen_ld_c_nc
[] = {
8362 gen_zero_extendqidi2_nc
,
8363 gen_zero_extendhidi2_nc
,
8364 gen_zero_extendsidi2_nc
,
8366 static gen_func_t gen_chk_a_clr
[] = {
8367 gen_advanced_load_check_clr_bi
,
8368 gen_advanced_load_check_clr_qi
,
8369 gen_advanced_load_check_clr_hi
,
8370 gen_advanced_load_check_clr_si
,
8371 gen_advanced_load_check_clr_di
,
8372 gen_advanced_load_check_clr_sf
,
8373 gen_advanced_load_check_clr_df
,
8374 gen_advanced_load_check_clr_xf
,
8375 gen_advanced_load_check_clr_ti
,
8376 gen_advanced_load_check_clr_di
,
8377 gen_advanced_load_check_clr_di
,
8378 gen_advanced_load_check_clr_di
,
8380 static gen_func_t gen_chk_a_nc
[] = {
8381 gen_advanced_load_check_nc_bi
,
8382 gen_advanced_load_check_nc_qi
,
8383 gen_advanced_load_check_nc_hi
,
8384 gen_advanced_load_check_nc_si
,
8385 gen_advanced_load_check_nc_di
,
8386 gen_advanced_load_check_nc_sf
,
8387 gen_advanced_load_check_nc_df
,
8388 gen_advanced_load_check_nc_xf
,
8389 gen_advanced_load_check_nc_ti
,
8390 gen_advanced_load_check_nc_di
,
8391 gen_advanced_load_check_nc_di
,
8392 gen_advanced_load_check_nc_di
,
8394 static gen_func_t gen_chk_s
[] = {
8395 gen_speculation_check_bi
,
8396 gen_speculation_check_qi
,
8397 gen_speculation_check_hi
,
8398 gen_speculation_check_si
,
8399 gen_speculation_check_di
,
8400 gen_speculation_check_sf
,
8401 gen_speculation_check_df
,
8402 gen_speculation_check_xf
,
8403 gen_speculation_check_ti
,
8404 gen_speculation_check_di
,
8405 gen_speculation_check_di
,
8406 gen_speculation_check_di
,
8409 gen_func_t
*gen_check
;
8411 if (ts
& BEGIN_DATA
)
8413 /* We don't need recovery because even if this is ld.sa
8414 ALAT entry will be allocated only if NAT bit is set to zero.
8415 So it is enough to use ld.c here. */
8419 gcc_assert (mflag_sched_spec_ldc
);
8421 if (clearing_check_p
)
8422 gen_check
= gen_ld_c_clr
;
8424 gen_check
= gen_ld_c_nc
;
8428 if (clearing_check_p
)
8429 gen_check
= gen_chk_a_clr
;
8431 gen_check
= gen_chk_a_nc
;
8434 else if (ts
& BEGIN_CONTROL
)
8437 /* We might want to use ld.sa -> ld.c instead of
8440 gcc_assert (!ia64_needs_block_p (ts
));
8442 if (clearing_check_p
)
8443 gen_check
= gen_ld_c_clr
;
8445 gen_check
= gen_ld_c_nc
;
8449 gen_check
= gen_chk_s
;
8455 gcc_assert (mode_no
>= 0);
8456 return gen_check
[mode_no
];
8459 /* Return nonzero, if INSN needs branchy recovery check. */
8461 ia64_needs_block_p (ds_t ts
)
8463 if (ts
& BEGIN_DATA
)
8464 return !mflag_sched_spec_ldc
;
8466 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
8468 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
8471 /* Generate (or regenerate) a recovery check for INSN. */
8473 ia64_gen_spec_check (rtx_insn
*insn
, rtx_insn
*label
, ds_t ds
)
8475 rtx op1
, pat
, check_pat
;
8476 gen_func_t gen_check
;
8479 mode_no
= get_mode_no_for_insn (insn
);
8480 gcc_assert (mode_no
>= 0);
8486 gcc_assert (!ia64_needs_block_p (ds
));
8487 op1
= copy_rtx (recog_data
.operand
[1]);
8490 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
8493 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
8495 pat
= PATTERN (insn
);
8496 if (GET_CODE (pat
) == COND_EXEC
)
8497 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8503 /* Return nonzero, if X is branchy recovery check. */
8505 ia64_spec_check_p (rtx x
)
8508 if (GET_CODE (x
) == COND_EXEC
)
8509 x
= COND_EXEC_CODE (x
);
8510 if (GET_CODE (x
) == SET
)
8511 return ia64_spec_check_src_p (SET_SRC (x
));
8515 /* Return nonzero, if SRC belongs to recovery check. */
8517 ia64_spec_check_src_p (rtx src
)
8519 if (GET_CODE (src
) == IF_THEN_ELSE
)
8524 if (GET_CODE (t
) == NE
)
8528 if (GET_CODE (t
) == UNSPEC
)
8534 if (code
== UNSPEC_LDCCLR
8535 || code
== UNSPEC_LDCNC
8536 || code
== UNSPEC_CHKACLR
8537 || code
== UNSPEC_CHKANC
8538 || code
== UNSPEC_CHKS
)
8540 gcc_assert (code
!= 0);
8550 /* The following page contains abstract data `bundle states' which are
8551 used for bundling insns (inserting nops and template generation). */
8553 /* The following describes state of insn bundling. */
8557 /* Unique bundle state number to identify them in the debugging
8560 rtx_insn
*insn
; /* corresponding insn, NULL for the 1st and the last state */
8561 /* number nops before and after the insn */
8562 short before_nops_num
, after_nops_num
;
8563 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
8565 int cost
; /* cost of the state in cycles */
8566 int accumulated_insns_num
; /* number of all previous insns including
8567 nops. L is considered as 2 insns */
8568 int branch_deviation
; /* deviation of previous branches from 3rd slots */
8569 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
8570 struct bundle_state
*next
; /* next state with the same insn_num */
8571 struct bundle_state
*originator
; /* originator (previous insn state) */
8572 /* All bundle states are in the following chain. */
8573 struct bundle_state
*allocated_states_chain
;
8574 /* The DFA State after issuing the insn and the nops. */
8578 /* The following is map insn number to the corresponding bundle state. */
8580 static struct bundle_state
**index_to_bundle_states
;
8582 /* The unique number of next bundle state. */
8584 static int bundle_states_num
;
8586 /* All allocated bundle states are in the following chain. */
8588 static struct bundle_state
*allocated_bundle_states_chain
;
8590 /* All allocated but not used bundle states are in the following
8593 static struct bundle_state
*free_bundle_state_chain
;
8596 /* The following function returns a free bundle state. */
8598 static struct bundle_state
*
8599 get_free_bundle_state (void)
8601 struct bundle_state
*result
;
8603 if (free_bundle_state_chain
!= NULL
)
8605 result
= free_bundle_state_chain
;
8606 free_bundle_state_chain
= result
->next
;
8610 result
= XNEW (struct bundle_state
);
8611 result
->dfa_state
= xmalloc (dfa_state_size
);
8612 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8613 allocated_bundle_states_chain
= result
;
8615 result
->unique_num
= bundle_states_num
++;
8620 /* The following function frees given bundle state. */
8623 free_bundle_state (struct bundle_state
*state
)
8625 state
->next
= free_bundle_state_chain
;
8626 free_bundle_state_chain
= state
;
8629 /* Start work with abstract data `bundle states'. */
8632 initiate_bundle_states (void)
8634 bundle_states_num
= 0;
8635 free_bundle_state_chain
= NULL
;
8636 allocated_bundle_states_chain
= NULL
;
8639 /* Finish work with abstract data `bundle states'. */
8642 finish_bundle_states (void)
8644 struct bundle_state
*curr_state
, *next_state
;
8646 for (curr_state
= allocated_bundle_states_chain
;
8648 curr_state
= next_state
)
8650 next_state
= curr_state
->allocated_states_chain
;
8651 free (curr_state
->dfa_state
);
8656 /* Hashtable helpers. */
8658 struct bundle_state_hasher
: nofree_ptr_hash
<bundle_state
>
8660 static inline hashval_t
hash (const bundle_state
*);
8661 static inline bool equal (const bundle_state
*, const bundle_state
*);
8664 /* The function returns hash of BUNDLE_STATE. */
8667 bundle_state_hasher::hash (const bundle_state
*state
)
8671 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8672 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8673 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8674 return result
+ state
->insn_num
;
8677 /* The function returns nonzero if the bundle state keys are equal. */
8680 bundle_state_hasher::equal (const bundle_state
*state1
,
8681 const bundle_state
*state2
)
8683 return (state1
->insn_num
== state2
->insn_num
8684 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8685 dfa_state_size
) == 0);
8688 /* Hash table of the bundle states. The key is dfa_state and insn_num
8689 of the bundle states. */
8691 static hash_table
<bundle_state_hasher
> *bundle_state_table
;
8693 /* The function inserts the BUNDLE_STATE into the hash table. The
8694 function returns nonzero if the bundle has been inserted into the
8695 table. The table contains the best bundle state with given key. */
8698 insert_bundle_state (struct bundle_state
*bundle_state
)
8700 struct bundle_state
**entry_ptr
;
8702 entry_ptr
= bundle_state_table
->find_slot (bundle_state
, INSERT
);
8703 if (*entry_ptr
== NULL
)
8705 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8706 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8707 *entry_ptr
= bundle_state
;
8710 else if (bundle_state
->cost
< (*entry_ptr
)->cost
8711 || (bundle_state
->cost
== (*entry_ptr
)->cost
8712 && ((*entry_ptr
)->accumulated_insns_num
8713 > bundle_state
->accumulated_insns_num
8714 || ((*entry_ptr
)->accumulated_insns_num
8715 == bundle_state
->accumulated_insns_num
8716 && ((*entry_ptr
)->branch_deviation
8717 > bundle_state
->branch_deviation
8718 || ((*entry_ptr
)->branch_deviation
8719 == bundle_state
->branch_deviation
8720 && (*entry_ptr
)->middle_bundle_stops
8721 > bundle_state
->middle_bundle_stops
))))))
8724 struct bundle_state temp
;
8727 **entry_ptr
= *bundle_state
;
8728 (*entry_ptr
)->next
= temp
.next
;
8729 *bundle_state
= temp
;
8734 /* Start work with the hash table. */
8737 initiate_bundle_state_table (void)
8739 bundle_state_table
= new hash_table
<bundle_state_hasher
> (50);
8742 /* Finish work with the hash table. */
8745 finish_bundle_state_table (void)
8747 delete bundle_state_table
;
8748 bundle_state_table
= NULL
;
8753 /* The following variable is a insn `nop' used to check bundle states
8754 with different number of inserted nops. */
8756 static rtx_insn
*ia64_nop
;
8758 /* The following function tries to issue NOPS_NUM nops for the current
8759 state without advancing processor cycle. If it failed, the
8760 function returns FALSE and frees the current state. */
8763 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8767 for (i
= 0; i
< nops_num
; i
++)
8768 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8770 free_bundle_state (curr_state
);
8776 /* The following function tries to issue INSN for the current
8777 state without advancing processor cycle. If it failed, the
8778 function returns FALSE and frees the current state. */
8781 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8783 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8785 free_bundle_state (curr_state
);
8791 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8792 starting with ORIGINATOR without advancing processor cycle. If
8793 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8794 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8795 If it was successful, the function creates new bundle state and
8796 insert into the hash table and into `index_to_bundle_states'. */
8799 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8800 rtx_insn
*insn
, int try_bundle_end_p
,
8801 int only_bundle_end_p
)
8803 struct bundle_state
*curr_state
;
8805 curr_state
= get_free_bundle_state ();
8806 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8807 curr_state
->insn
= insn
;
8808 curr_state
->insn_num
= originator
->insn_num
+ 1;
8809 curr_state
->cost
= originator
->cost
;
8810 curr_state
->originator
= originator
;
8811 curr_state
->before_nops_num
= before_nops_num
;
8812 curr_state
->after_nops_num
= 0;
8813 curr_state
->accumulated_insns_num
8814 = originator
->accumulated_insns_num
+ before_nops_num
;
8815 curr_state
->branch_deviation
= originator
->branch_deviation
;
8816 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8818 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8820 gcc_assert (GET_MODE (insn
) != TImode
);
8821 if (!try_issue_nops (curr_state
, before_nops_num
))
8823 if (!try_issue_insn (curr_state
, insn
))
8825 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8826 if (curr_state
->accumulated_insns_num
% 3 != 0)
8827 curr_state
->middle_bundle_stops
++;
8828 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8829 && curr_state
->accumulated_insns_num
% 3 != 0)
8831 free_bundle_state (curr_state
);
8835 else if (GET_MODE (insn
) != TImode
)
8837 if (!try_issue_nops (curr_state
, before_nops_num
))
8839 if (!try_issue_insn (curr_state
, insn
))
8841 curr_state
->accumulated_insns_num
++;
8842 gcc_assert (!unknown_for_bundling_p (insn
));
8844 if (ia64_safe_type (insn
) == TYPE_L
)
8845 curr_state
->accumulated_insns_num
++;
8849 /* If this is an insn that must be first in a group, then don't allow
8850 nops to be emitted before it. Currently, alloc is the only such
8851 supported instruction. */
8852 /* ??? The bundling automatons should handle this for us, but they do
8853 not yet have support for the first_insn attribute. */
8854 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8856 free_bundle_state (curr_state
);
8860 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8861 state_transition (curr_state
->dfa_state
, NULL
);
8863 if (!try_issue_nops (curr_state
, before_nops_num
))
8865 if (!try_issue_insn (curr_state
, insn
))
8867 curr_state
->accumulated_insns_num
++;
8868 if (unknown_for_bundling_p (insn
))
8870 /* Finish bundle containing asm insn. */
8871 curr_state
->after_nops_num
8872 = 3 - curr_state
->accumulated_insns_num
% 3;
8873 curr_state
->accumulated_insns_num
8874 += 3 - curr_state
->accumulated_insns_num
% 3;
8876 else if (ia64_safe_type (insn
) == TYPE_L
)
8877 curr_state
->accumulated_insns_num
++;
8879 if (ia64_safe_type (insn
) == TYPE_B
)
8880 curr_state
->branch_deviation
8881 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8882 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8884 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8887 struct bundle_state
*curr_state1
;
8888 struct bundle_state
*allocated_states_chain
;
8890 curr_state1
= get_free_bundle_state ();
8891 dfa_state
= curr_state1
->dfa_state
;
8892 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8893 *curr_state1
= *curr_state
;
8894 curr_state1
->dfa_state
= dfa_state
;
8895 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8896 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8898 curr_state
= curr_state1
;
8900 if (!try_issue_nops (curr_state
,
8901 3 - curr_state
->accumulated_insns_num
% 3))
8903 curr_state
->after_nops_num
8904 = 3 - curr_state
->accumulated_insns_num
% 3;
8905 curr_state
->accumulated_insns_num
8906 += 3 - curr_state
->accumulated_insns_num
% 3;
8908 if (!insert_bundle_state (curr_state
))
8909 free_bundle_state (curr_state
);
8913 /* The following function returns position in the two window bundle
8917 get_max_pos (state_t state
)
8919 if (cpu_unit_reservation_p (state
, pos_6
))
8921 else if (cpu_unit_reservation_p (state
, pos_5
))
8923 else if (cpu_unit_reservation_p (state
, pos_4
))
8925 else if (cpu_unit_reservation_p (state
, pos_3
))
8927 else if (cpu_unit_reservation_p (state
, pos_2
))
8929 else if (cpu_unit_reservation_p (state
, pos_1
))
8935 /* The function returns code of a possible template for given position
8936 and state. The function should be called only with 2 values of
8937 position equal to 3 or 6. We avoid generating F NOPs by putting
8938 templates containing F insns at the end of the template search
8939 because undocumented anomaly in McKinley derived cores which can
8940 cause stalls if an F-unit insn (including a NOP) is issued within a
8941 six-cycle window after reading certain application registers (such
8942 as ar.bsp). Furthermore, power-considerations also argue against
8943 the use of F-unit instructions unless they're really needed. */
8946 get_template (state_t state
, int pos
)
8951 if (cpu_unit_reservation_p (state
, _0mmi_
))
8953 else if (cpu_unit_reservation_p (state
, _0mii_
))
8955 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8957 else if (cpu_unit_reservation_p (state
, _0mib_
))
8959 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8961 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8963 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8965 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8967 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8969 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8974 if (cpu_unit_reservation_p (state
, _1mmi_
))
8976 else if (cpu_unit_reservation_p (state
, _1mii_
))
8978 else if (cpu_unit_reservation_p (state
, _1mmb_
))
8980 else if (cpu_unit_reservation_p (state
, _1mib_
))
8982 else if (cpu_unit_reservation_p (state
, _1mbb_
))
8984 else if (cpu_unit_reservation_p (state
, _1bbb_
))
8986 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
8988 else if (cpu_unit_reservation_p (state
, _1mfi_
))
8990 else if (cpu_unit_reservation_p (state
, _1mfb_
))
8992 else if (cpu_unit_reservation_p (state
, _1mlx_
))
9001 /* True when INSN is important for bundling. */
9004 important_for_bundling_p (rtx_insn
*insn
)
9006 return (INSN_P (insn
)
9007 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
9008 && GET_CODE (PATTERN (insn
)) != USE
9009 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
9012 /* The following function returns an insn important for insn bundling
9013 followed by INSN and before TAIL. */
9016 get_next_important_insn (rtx_insn
*insn
, rtx_insn
*tail
)
9018 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
9019 if (important_for_bundling_p (insn
))
9024 /* True when INSN is unknown, but important, for bundling. */
9027 unknown_for_bundling_p (rtx_insn
*insn
)
9029 return (INSN_P (insn
)
9030 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_UNKNOWN
9031 && GET_CODE (PATTERN (insn
)) != USE
9032 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
9035 /* Add a bundle selector TEMPLATE0 before INSN. */
9038 ia64_add_bundle_selector_before (int template0
, rtx_insn
*insn
)
9040 rtx b
= gen_bundle_selector (GEN_INT (template0
));
9042 ia64_emit_insn_before (b
, insn
);
9043 #if NR_BUNDLES == 10
9044 if ((template0
== 4 || template0
== 5)
9045 && ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9048 rtx note
= NULL_RTX
;
9050 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9051 first or second slot. If it is and has REG_EH_NOTE set, copy it
9052 to following nops, as br.call sets rp to the address of following
9053 bundle and therefore an EH region end must be on a bundle
9055 insn
= PREV_INSN (insn
);
9056 for (i
= 0; i
< 3; i
++)
9059 insn
= next_active_insn (insn
);
9060 while (NONJUMP_INSN_P (insn
)
9061 && get_attr_empty (insn
) == EMPTY_YES
);
9063 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
9068 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
9069 || code
== CODE_FOR_nop_b
);
9070 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
9073 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
9080 /* The following function does insn bundling. Bundling means
9081 inserting templates and nop insns to fit insn groups into permitted
9082 templates. Instruction scheduling uses NDFA (non-deterministic
9083 finite automata) encoding informations about the templates and the
9084 inserted nops. Nondeterminism of the automata permits follows
9085 all possible insn sequences very fast.
9087 Unfortunately it is not possible to get information about inserting
9088 nop insns and used templates from the automata states. The
9089 automata only says that we can issue an insn possibly inserting
9090 some nops before it and using some template. Therefore insn
9091 bundling in this function is implemented by using DFA
9092 (deterministic finite automata). We follow all possible insn
9093 sequences by inserting 0-2 nops (that is what the NDFA describe for
9094 insn scheduling) before/after each insn being bundled. We know the
9095 start of simulated processor cycle from insn scheduling (insn
9096 starting a new cycle has TImode).
9098 Simple implementation of insn bundling would create enormous
9099 number of possible insn sequences satisfying information about new
9100 cycle ticks taken from the insn scheduling. To make the algorithm
9101 practical we use dynamic programming. Each decision (about
9102 inserting nops and implicitly about previous decisions) is described
9103 by structure bundle_state (see above). If we generate the same
9104 bundle state (key is automaton state after issuing the insns and
9105 nops for it), we reuse already generated one. As consequence we
9106 reject some decisions which cannot improve the solution and
9107 reduce memory for the algorithm.
9109 When we reach the end of EBB (extended basic block), we choose the
9110 best sequence and then, moving back in EBB, insert templates for
9111 the best alternative. The templates are taken from querying
9112 automaton state for each insn in chosen bundle states.
9114 So the algorithm makes two (forward and backward) passes through
9118 bundling (FILE *dump
, int verbose
, rtx_insn
*prev_head_insn
, rtx_insn
*tail
)
9120 struct bundle_state
*curr_state
, *next_state
, *best_state
;
9121 rtx_insn
*insn
, *next_insn
;
9123 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
9124 int pos
= 0, max_pos
, template0
, template1
;
9126 enum attr_type type
;
9129 /* Count insns in the EBB. */
9130 for (insn
= NEXT_INSN (prev_head_insn
);
9131 insn
&& insn
!= tail
;
9132 insn
= NEXT_INSN (insn
))
9138 dfa_clean_insn_cache ();
9139 initiate_bundle_state_table ();
9140 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
9141 /* First (forward) pass -- generation of bundle states. */
9142 curr_state
= get_free_bundle_state ();
9143 curr_state
->insn
= NULL
;
9144 curr_state
->before_nops_num
= 0;
9145 curr_state
->after_nops_num
= 0;
9146 curr_state
->insn_num
= 0;
9147 curr_state
->cost
= 0;
9148 curr_state
->accumulated_insns_num
= 0;
9149 curr_state
->branch_deviation
= 0;
9150 curr_state
->middle_bundle_stops
= 0;
9151 curr_state
->next
= NULL
;
9152 curr_state
->originator
= NULL
;
9153 state_reset (curr_state
->dfa_state
);
9154 index_to_bundle_states
[0] = curr_state
;
9156 /* Shift cycle mark if it is put on insn which could be ignored. */
9157 for (insn
= NEXT_INSN (prev_head_insn
);
9159 insn
= NEXT_INSN (insn
))
9161 && !important_for_bundling_p (insn
)
9162 && GET_MODE (insn
) == TImode
)
9164 PUT_MODE (insn
, VOIDmode
);
9165 for (next_insn
= NEXT_INSN (insn
);
9167 next_insn
= NEXT_INSN (next_insn
))
9168 if (important_for_bundling_p (next_insn
)
9169 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
9171 PUT_MODE (next_insn
, TImode
);
9175 /* Forward pass: generation of bundle states. */
9176 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
9180 gcc_assert (important_for_bundling_p (insn
));
9181 type
= ia64_safe_type (insn
);
9182 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
9184 index_to_bundle_states
[insn_num
] = NULL
;
9185 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
9187 curr_state
= next_state
)
9189 pos
= curr_state
->accumulated_insns_num
% 3;
9190 next_state
= curr_state
->next
;
9191 /* We must fill up the current bundle in order to start a
9192 subsequent asm insn in a new bundle. Asm insn is always
9193 placed in a separate bundle. */
9195 = (next_insn
!= NULL_RTX
9196 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
9197 && unknown_for_bundling_p (next_insn
));
9198 /* We may fill up the current bundle if it is the cycle end
9199 without a group barrier. */
9201 = (only_bundle_end_p
|| next_insn
== NULL_RTX
9202 || (GET_MODE (next_insn
) == TImode
9203 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
9204 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
9206 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
9208 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
9210 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
9213 gcc_assert (index_to_bundle_states
[insn_num
]);
9214 for (curr_state
= index_to_bundle_states
[insn_num
];
9216 curr_state
= curr_state
->next
)
9217 if (verbose
>= 2 && dump
)
9219 /* This structure is taken from generated code of the
9220 pipeline hazard recognizer (see file insn-attrtab.c).
9221 Please don't forget to change the structure if a new
9222 automaton is added to .md file. */
9225 unsigned short one_automaton_state
;
9226 unsigned short oneb_automaton_state
;
9227 unsigned short two_automaton_state
;
9228 unsigned short twob_automaton_state
;
9233 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9234 curr_state
->unique_num
,
9235 (curr_state
->originator
== NULL
9236 ? -1 : curr_state
->originator
->unique_num
),
9238 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9239 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9240 curr_state
->middle_bundle_stops
,
9241 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9246 /* We should find a solution because the 2nd insn scheduling has
9248 gcc_assert (index_to_bundle_states
[insn_num
]);
9249 /* Find a state corresponding to the best insn sequence. */
9251 for (curr_state
= index_to_bundle_states
[insn_num
];
9253 curr_state
= curr_state
->next
)
9254 /* We are just looking at the states with fully filled up last
9255 bundle. The first we prefer insn sequences with minimal cost
9256 then with minimal inserted nops and finally with branch insns
9257 placed in the 3rd slots. */
9258 if (curr_state
->accumulated_insns_num
% 3 == 0
9259 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
9260 || (best_state
->cost
== curr_state
->cost
9261 && (curr_state
->accumulated_insns_num
9262 < best_state
->accumulated_insns_num
9263 || (curr_state
->accumulated_insns_num
9264 == best_state
->accumulated_insns_num
9265 && (curr_state
->branch_deviation
9266 < best_state
->branch_deviation
9267 || (curr_state
->branch_deviation
9268 == best_state
->branch_deviation
9269 && curr_state
->middle_bundle_stops
9270 < best_state
->middle_bundle_stops
)))))))
9271 best_state
= curr_state
;
9272 /* Second (backward) pass: adding nops and templates. */
9273 gcc_assert (best_state
);
9274 insn_num
= best_state
->before_nops_num
;
9275 template0
= template1
= -1;
9276 for (curr_state
= best_state
;
9277 curr_state
->originator
!= NULL
;
9278 curr_state
= curr_state
->originator
)
9280 insn
= curr_state
->insn
;
9281 asm_p
= unknown_for_bundling_p (insn
);
9283 if (verbose
>= 2 && dump
)
9287 unsigned short one_automaton_state
;
9288 unsigned short oneb_automaton_state
;
9289 unsigned short two_automaton_state
;
9290 unsigned short twob_automaton_state
;
9295 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9296 curr_state
->unique_num
,
9297 (curr_state
->originator
== NULL
9298 ? -1 : curr_state
->originator
->unique_num
),
9300 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9301 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9302 curr_state
->middle_bundle_stops
,
9303 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9306 /* Find the position in the current bundle window. The window can
9307 contain at most two bundles. Two bundle window means that
9308 the processor will make two bundle rotation. */
9309 max_pos
= get_max_pos (curr_state
->dfa_state
);
9311 /* The following (negative template number) means that the
9312 processor did one bundle rotation. */
9313 || (max_pos
== 3 && template0
< 0))
9315 /* We are at the end of the window -- find template(s) for
9319 template0
= get_template (curr_state
->dfa_state
, 3);
9322 template1
= get_template (curr_state
->dfa_state
, 3);
9323 template0
= get_template (curr_state
->dfa_state
, 6);
9326 if (max_pos
> 3 && template1
< 0)
9327 /* It may happen when we have the stop inside a bundle. */
9329 gcc_assert (pos
<= 3);
9330 template1
= get_template (curr_state
->dfa_state
, 3);
9334 /* Emit nops after the current insn. */
9335 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
9337 rtx nop_pat
= gen_nop ();
9338 rtx_insn
*nop
= emit_insn_after (nop_pat
, insn
);
9340 gcc_assert (pos
>= 0);
9343 /* We are at the start of a bundle: emit the template
9344 (it should be defined). */
9345 gcc_assert (template0
>= 0);
9346 ia64_add_bundle_selector_before (template0
, nop
);
9347 /* If we have two bundle window, we make one bundle
9348 rotation. Otherwise template0 will be undefined
9349 (negative value). */
9350 template0
= template1
;
9354 /* Move the position backward in the window. Group barrier has
9355 no slot. Asm insn takes all bundle. */
9356 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9357 && !unknown_for_bundling_p (insn
))
9359 /* Long insn takes 2 slots. */
9360 if (ia64_safe_type (insn
) == TYPE_L
)
9362 gcc_assert (pos
>= 0);
9364 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9365 && !unknown_for_bundling_p (insn
))
9367 /* The current insn is at the bundle start: emit the
9369 gcc_assert (template0
>= 0);
9370 ia64_add_bundle_selector_before (template0
, insn
);
9371 b
= PREV_INSN (insn
);
9373 /* See comment above in analogous place for emitting nops
9375 template0
= template1
;
9378 /* Emit nops after the current insn. */
9379 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
9381 rtx nop_pat
= gen_nop ();
9382 ia64_emit_insn_before (nop_pat
, insn
);
9383 rtx_insn
*nop
= PREV_INSN (insn
);
9386 gcc_assert (pos
>= 0);
9389 /* See comment above in analogous place for emitting nops
9391 gcc_assert (template0
>= 0);
9392 ia64_add_bundle_selector_before (template0
, insn
);
9393 b
= PREV_INSN (insn
);
9395 template0
= template1
;
9403 /* Assert right calculation of middle_bundle_stops. */
9404 int num
= best_state
->middle_bundle_stops
;
9405 bool start_bundle
= true, end_bundle
= false;
9407 for (insn
= NEXT_INSN (prev_head_insn
);
9408 insn
&& insn
!= tail
;
9409 insn
= NEXT_INSN (insn
))
9413 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
9414 start_bundle
= true;
9417 rtx_insn
*next_insn
;
9419 for (next_insn
= NEXT_INSN (insn
);
9420 next_insn
&& next_insn
!= tail
;
9421 next_insn
= NEXT_INSN (next_insn
))
9422 if (INSN_P (next_insn
)
9423 && (ia64_safe_itanium_class (next_insn
)
9424 != ITANIUM_CLASS_IGNORE
9425 || recog_memoized (next_insn
)
9426 == CODE_FOR_bundle_selector
)
9427 && GET_CODE (PATTERN (next_insn
)) != USE
9428 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
9431 end_bundle
= next_insn
== NULL_RTX
9432 || next_insn
== tail
9433 || (INSN_P (next_insn
)
9434 && recog_memoized (next_insn
) == CODE_FOR_bundle_selector
);
9435 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
9436 && !start_bundle
&& !end_bundle
9438 && !unknown_for_bundling_p (next_insn
))
9441 start_bundle
= false;
9445 gcc_assert (num
== 0);
9448 free (index_to_bundle_states
);
9449 finish_bundle_state_table ();
9451 dfa_clean_insn_cache ();
9454 /* The following function is called at the end of scheduling BB or
9455 EBB. After reload, it inserts stop bits and does insn bundling. */
9458 ia64_sched_finish (FILE *dump
, int sched_verbose
)
9461 fprintf (dump
, "// Finishing schedule.\n");
9462 if (!reload_completed
)
9464 if (reload_completed
)
9466 final_emit_insn_group_barriers (dump
);
9467 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
9468 current_sched_info
->next_tail
);
9469 if (sched_verbose
&& dump
)
9470 fprintf (dump
, "// finishing %d-%d\n",
9471 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
9472 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
9478 /* The following function inserts stop bits in scheduled BB or EBB. */
9481 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
9484 int need_barrier_p
= 0;
9485 int seen_good_insn
= 0;
9487 init_insn_group_barriers ();
9489 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
9490 insn
!= current_sched_info
->next_tail
;
9491 insn
= NEXT_INSN (insn
))
9493 if (BARRIER_P (insn
))
9495 rtx_insn
*last
= prev_active_insn (insn
);
9499 if (JUMP_TABLE_DATA_P (last
))
9500 last
= prev_active_insn (last
);
9501 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9502 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
9504 init_insn_group_barriers ();
9508 else if (NONDEBUG_INSN_P (insn
))
9510 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
9512 init_insn_group_barriers ();
9516 else if (need_barrier_p
|| group_barrier_needed (insn
)
9517 || (mflag_sched_stop_bits_after_every_cycle
9518 && GET_MODE (insn
) == TImode
9521 if (TARGET_EARLY_STOP_BITS
)
9526 last
!= current_sched_info
->prev_head
;
9527 last
= PREV_INSN (last
))
9528 if (INSN_P (last
) && GET_MODE (last
) == TImode
9529 && stops_p
[INSN_UID (last
)])
9531 if (last
== current_sched_info
->prev_head
)
9533 last
= prev_active_insn (last
);
9535 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9536 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9538 init_insn_group_barriers ();
9539 for (last
= NEXT_INSN (last
);
9541 last
= NEXT_INSN (last
))
9544 group_barrier_needed (last
);
9545 if (recog_memoized (last
) >= 0
9546 && important_for_bundling_p (last
))
9552 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9554 init_insn_group_barriers ();
9557 group_barrier_needed (insn
);
9558 if (recog_memoized (insn
) >= 0
9559 && important_for_bundling_p (insn
))
9562 else if (recog_memoized (insn
) >= 0
9563 && important_for_bundling_p (insn
))
9565 need_barrier_p
= (CALL_P (insn
) || unknown_for_bundling_p (insn
));
9572 /* If the following function returns TRUE, we will use the DFA
9576 ia64_first_cycle_multipass_dfa_lookahead (void)
9578 return (reload_completed
? 6 : 4);
9581 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9584 ia64_init_dfa_pre_cycle_insn (void)
9586 if (temp_dfa_state
== NULL
)
9588 dfa_state_size
= state_size ();
9589 temp_dfa_state
= xmalloc (dfa_state_size
);
9590 prev_cycle_state
= xmalloc (dfa_state_size
);
9592 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9593 SET_PREV_INSN (dfa_pre_cycle_insn
) = SET_NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9594 recog_memoized (dfa_pre_cycle_insn
);
9595 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9596 SET_PREV_INSN (dfa_stop_insn
) = SET_NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9597 recog_memoized (dfa_stop_insn
);
9600 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9601 used by the DFA insn scheduler. */
9604 ia64_dfa_pre_cycle_insn (void)
9606 return dfa_pre_cycle_insn
;
9609 /* The following function returns TRUE if PRODUCER (of type ilog or
9610 ld) produces address for CONSUMER (of type st or stf). */
9613 ia64_st_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9617 gcc_assert (producer
&& consumer
);
9618 dest
= ia64_single_set (producer
);
9620 reg
= SET_DEST (dest
);
9622 if (GET_CODE (reg
) == SUBREG
)
9623 reg
= SUBREG_REG (reg
);
9624 gcc_assert (GET_CODE (reg
) == REG
);
9626 dest
= ia64_single_set (consumer
);
9628 mem
= SET_DEST (dest
);
9629 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9630 return reg_mentioned_p (reg
, mem
);
9633 /* The following function returns TRUE if PRODUCER (of type ilog or
9634 ld) produces address for CONSUMER (of type ld or fld). */
9637 ia64_ld_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9639 rtx dest
, src
, reg
, mem
;
9641 gcc_assert (producer
&& consumer
);
9642 dest
= ia64_single_set (producer
);
9644 reg
= SET_DEST (dest
);
9646 if (GET_CODE (reg
) == SUBREG
)
9647 reg
= SUBREG_REG (reg
);
9648 gcc_assert (GET_CODE (reg
) == REG
);
9650 src
= ia64_single_set (consumer
);
9652 mem
= SET_SRC (src
);
9655 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9656 mem
= XVECEXP (mem
, 0, 0);
9657 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9658 /* ??? Is this bypass necessary for ld.c? */
9660 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9661 mem
= XEXP (mem
, 1);
9664 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9665 mem
= XEXP (mem
, 0);
9667 if (GET_CODE (mem
) == UNSPEC
)
9669 int c
= XINT (mem
, 1);
9671 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9672 || c
== UNSPEC_LDSA
);
9673 mem
= XVECEXP (mem
, 0, 0);
9676 /* Note that LO_SUM is used for GOT loads. */
9677 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9679 return reg_mentioned_p (reg
, mem
);
9682 /* The following function returns TRUE if INSN produces address for a
9683 load/store insn. We will place such insns into M slot because it
9684 decreases its latency time. */
9687 ia64_produce_address_p (rtx insn
)
9693 /* Emit pseudo-ops for the assembler to describe predicate relations.
9694 At present this assumes that we only consider predicate pairs to
9695 be mutex, and that the assembler can deduce proper values from
9696 straight-line code. */
9699 emit_predicate_relation_info (void)
9703 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9706 rtx_insn
*head
= BB_HEAD (bb
);
9708 /* We only need such notes at code labels. */
9709 if (! LABEL_P (head
))
9711 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9712 head
= NEXT_INSN (head
);
9714 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9715 grabbing the entire block of predicate registers. */
9716 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9717 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9719 rtx p
= gen_rtx_REG (BImode
, r
);
9720 rtx_insn
*n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9721 if (head
== BB_END (bb
))
9727 /* Look for conditional calls that do not return, and protect predicate
9728 relations around them. Otherwise the assembler will assume the call
9729 returns, and complain about uses of call-clobbered predicates after
9731 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9733 rtx_insn
*insn
= BB_HEAD (bb
);
9738 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9739 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9742 emit_insn_before (gen_safe_across_calls_all (), insn
);
9743 rtx_insn
*a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9744 if (BB_HEAD (bb
) == insn
)
9746 if (BB_END (bb
) == insn
)
9750 if (insn
== BB_END (bb
))
9752 insn
= NEXT_INSN (insn
);
9757 /* Perform machine dependent operations on the rtl chain INSNS. */
9762 /* We are freeing block_for_insn in the toplev to keep compatibility
9763 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9764 compute_bb_for_insn ();
9766 /* If optimizing, we'll have split before scheduling. */
9770 if (optimize
&& flag_schedule_insns_after_reload
9771 && dbg_cnt (ia64_sched2
))
9774 timevar_push (TV_SCHED2
);
9775 ia64_final_schedule
= 1;
9777 /* We can't let modulo-sched prevent us from scheduling any bbs,
9778 since we need the final schedule to produce bundle information. */
9779 FOR_EACH_BB_FN (bb
, cfun
)
9780 bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
9782 initiate_bundle_states ();
9783 ia64_nop
= make_insn_raw (gen_nop ());
9784 SET_PREV_INSN (ia64_nop
) = SET_NEXT_INSN (ia64_nop
) = NULL_RTX
;
9785 recog_memoized (ia64_nop
);
9786 clocks_length
= get_max_uid () + 1;
9787 stops_p
= XCNEWVEC (char, clocks_length
);
9789 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9791 pos_1
= get_cpu_unit_code ("2_1");
9792 pos_2
= get_cpu_unit_code ("2_2");
9793 pos_3
= get_cpu_unit_code ("2_3");
9794 pos_4
= get_cpu_unit_code ("2_4");
9795 pos_5
= get_cpu_unit_code ("2_5");
9796 pos_6
= get_cpu_unit_code ("2_6");
9797 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9798 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9799 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9800 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9801 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9802 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9803 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9804 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9805 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9806 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9807 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9808 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9809 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9810 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9811 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9812 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9813 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9814 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9815 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9816 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9820 pos_1
= get_cpu_unit_code ("1_1");
9821 pos_2
= get_cpu_unit_code ("1_2");
9822 pos_3
= get_cpu_unit_code ("1_3");
9823 pos_4
= get_cpu_unit_code ("1_4");
9824 pos_5
= get_cpu_unit_code ("1_5");
9825 pos_6
= get_cpu_unit_code ("1_6");
9826 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9827 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9828 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9829 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9830 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9831 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9832 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9833 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9834 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9835 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9836 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9837 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9838 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9839 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9840 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9841 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9842 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9843 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9844 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9845 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9848 if (flag_selective_scheduling2
9849 && !maybe_skip_selective_scheduling ())
9850 run_selective_scheduling ();
9854 /* Redo alignment computation, as it might gone wrong. */
9855 compute_alignments ();
9857 /* We cannot reuse this one because it has been corrupted by the
9859 finish_bundle_states ();
9862 emit_insn_group_barriers (dump_file
);
9864 ia64_final_schedule
= 0;
9865 timevar_pop (TV_SCHED2
);
9868 emit_all_insn_group_barriers (dump_file
);
9872 /* A call must not be the last instruction in a function, so that the
9873 return address is still within the function, so that unwinding works
9874 properly. Note that IA-64 differs from dwarf2 on this point. */
9875 if (ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9880 insn
= get_last_insn ();
9881 if (! INSN_P (insn
))
9882 insn
= prev_active_insn (insn
);
9885 /* Skip over insns that expand to nothing. */
9886 while (NONJUMP_INSN_P (insn
)
9887 && get_attr_empty (insn
) == EMPTY_YES
)
9889 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9890 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9892 insn
= prev_active_insn (insn
);
9897 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9898 emit_insn (gen_break_f ());
9899 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9904 emit_predicate_relation_info ();
9906 if (flag_var_tracking
)
9908 timevar_push (TV_VAR_TRACKING
);
9909 variable_tracking_main ();
9910 timevar_pop (TV_VAR_TRACKING
);
9912 df_finish_pass (false);
9915 /* Return true if REGNO is used by the epilogue. */
9918 ia64_epilogue_uses (int regno
)
9923 /* With a call to a function in another module, we will write a new
9924 value to "gp". After returning from such a call, we need to make
9925 sure the function restores the original gp-value, even if the
9926 function itself does not use the gp anymore. */
9927 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9929 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9930 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9931 /* For functions defined with the syscall_linkage attribute, all
9932 input registers are marked as live at all function exits. This
9933 prevents the register allocator from using the input registers,
9934 which in turn makes it possible to restart a system call after
9935 an interrupt without having to save/restore the input registers.
9936 This also prevents kernel data from leaking to application code. */
9937 return lookup_attribute ("syscall_linkage",
9938 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9941 /* Conditional return patterns can't represent the use of `b0' as
9942 the return address, so we force the value live this way. */
9946 /* Likewise for ar.pfs, which is used by br.ret. */
9954 /* Return true if REGNO is used by the frame unwinder. */
9957 ia64_eh_uses (int regno
)
9961 if (! reload_completed
)
9967 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9968 if (regno
== current_frame_info
.r
[r
]
9969 || regno
== emitted_frame_related_regs
[r
])
9975 /* Return true if this goes in small data/bss. */
9977 /* ??? We could also support own long data here. Generating movl/add/ld8
9978 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9979 code faster because there is one less load. This also includes incomplete
9980 types which can't go in sdata/sbss. */
9983 ia64_in_small_data_p (const_tree exp
)
9985 if (TARGET_NO_SDATA
)
9988 /* We want to merge strings, so we never consider them small data. */
9989 if (TREE_CODE (exp
) == STRING_CST
)
9992 /* Functions are never small data. */
9993 if (TREE_CODE (exp
) == FUNCTION_DECL
)
9996 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
9998 const char *section
= DECL_SECTION_NAME (exp
);
10000 if (strcmp (section
, ".sdata") == 0
10001 || strncmp (section
, ".sdata.", 7) == 0
10002 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
10003 || strcmp (section
, ".sbss") == 0
10004 || strncmp (section
, ".sbss.", 6) == 0
10005 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
10010 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
10012 /* If this is an incomplete type with size 0, then we can't put it
10013 in sdata because it might be too big when completed. */
10014 if (size
> 0 && size
<= ia64_section_threshold
)
10021 /* Output assembly directives for prologue regions. */
10023 /* The current basic block number. */
10025 static bool last_block
;
10027 /* True if we need a copy_state command at the start of the next block. */
10029 static bool need_copy_state
;
10031 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
10032 # define MAX_ARTIFICIAL_LABEL_BYTES 30
10035 /* The function emits unwind directives for the start of an epilogue. */
10038 process_epilogue (FILE *asm_out_file
, rtx insn ATTRIBUTE_UNUSED
,
10039 bool unwind
, bool frame ATTRIBUTE_UNUSED
)
10041 /* If this isn't the last block of the function, then we need to label the
10042 current state, and copy it back in at the start of the next block. */
10047 fprintf (asm_out_file
, "\t.label_state %d\n",
10048 ++cfun
->machine
->state_num
);
10049 need_copy_state
= true;
10053 fprintf (asm_out_file
, "\t.restore sp\n");
10056 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10059 process_cfa_adjust_cfa (FILE *asm_out_file
, rtx pat
, rtx insn
,
10060 bool unwind
, bool frame
)
10062 rtx dest
= SET_DEST (pat
);
10063 rtx src
= SET_SRC (pat
);
10065 if (dest
== stack_pointer_rtx
)
10067 if (GET_CODE (src
) == PLUS
)
10069 rtx op0
= XEXP (src
, 0);
10070 rtx op1
= XEXP (src
, 1);
10072 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
10074 if (INTVAL (op1
) < 0)
10076 gcc_assert (!frame_pointer_needed
);
10078 fprintf (asm_out_file
,
10079 "\t.fframe " HOST_WIDE_INT_PRINT_DEC
"\n",
10083 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10087 gcc_assert (src
== hard_frame_pointer_rtx
);
10088 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10091 else if (dest
== hard_frame_pointer_rtx
)
10093 gcc_assert (src
== stack_pointer_rtx
);
10094 gcc_assert (frame_pointer_needed
);
10097 fprintf (asm_out_file
, "\t.vframe r%d\n",
10098 ia64_dbx_register_number (REGNO (dest
)));
10101 gcc_unreachable ();
10104 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10107 process_cfa_register (FILE *asm_out_file
, rtx pat
, bool unwind
)
10109 rtx dest
= SET_DEST (pat
);
10110 rtx src
= SET_SRC (pat
);
10111 int dest_regno
= REGNO (dest
);
10116 /* Saving return address pointer. */
10118 fprintf (asm_out_file
, "\t.save rp, r%d\n",
10119 ia64_dbx_register_number (dest_regno
));
10123 src_regno
= REGNO (src
);
10128 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
10130 fprintf (asm_out_file
, "\t.save pr, r%d\n",
10131 ia64_dbx_register_number (dest_regno
));
10134 case AR_UNAT_REGNUM
:
10135 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
10137 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
10138 ia64_dbx_register_number (dest_regno
));
10142 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
10144 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
10145 ia64_dbx_register_number (dest_regno
));
10149 /* Everything else should indicate being stored to memory. */
10150 gcc_unreachable ();
10154 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10157 process_cfa_offset (FILE *asm_out_file
, rtx pat
, bool unwind
)
10159 rtx dest
= SET_DEST (pat
);
10160 rtx src
= SET_SRC (pat
);
10161 int src_regno
= REGNO (src
);
10162 const char *saveop
;
10166 gcc_assert (MEM_P (dest
));
10167 if (GET_CODE (XEXP (dest
, 0)) == REG
)
10169 base
= XEXP (dest
, 0);
10174 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
10175 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
10176 base
= XEXP (XEXP (dest
, 0), 0);
10177 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
10180 if (base
== hard_frame_pointer_rtx
)
10182 saveop
= ".savepsp";
10187 gcc_assert (base
== stack_pointer_rtx
);
10188 saveop
= ".savesp";
10191 src_regno
= REGNO (src
);
10195 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
10197 fprintf (asm_out_file
, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC
"\n",
10202 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
10204 fprintf (asm_out_file
, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC
"\n",
10209 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
10211 fprintf (asm_out_file
, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC
"\n",
10215 case AR_PFS_REGNUM
:
10216 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
10218 fprintf (asm_out_file
, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC
"\n",
10222 case AR_UNAT_REGNUM
:
10223 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
10225 fprintf (asm_out_file
, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC
"\n",
10234 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
10235 1 << (src_regno
- GR_REG (4)));
10244 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
10245 1 << (src_regno
- BR_REG (1)));
10253 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
10254 1 << (src_regno
- FR_REG (2)));
10257 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10258 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10259 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10260 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10262 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
10263 1 << (src_regno
- FR_REG (12)));
10267 /* ??? For some reason we mark other general registers, even those
10268 we can't represent in the unwind info. Ignore them. */
10273 /* This function looks at a single insn and emits any directives
10274 required to unwind this insn. */
10277 ia64_asm_unwind_emit (FILE *asm_out_file
, rtx_insn
*insn
)
10279 bool unwind
= ia64_except_unwind_info (&global_options
) == UI_TARGET
;
10280 bool frame
= dwarf2out_do_frame ();
10284 if (!unwind
&& !frame
)
10287 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
10289 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
10290 == EXIT_BLOCK_PTR_FOR_FN (cfun
);
10292 /* Restore unwind state from immediately before the epilogue. */
10293 if (need_copy_state
)
10297 fprintf (asm_out_file
, "\t.body\n");
10298 fprintf (asm_out_file
, "\t.copy_state %d\n",
10299 cfun
->machine
->state_num
);
10301 need_copy_state
= false;
10305 if (NOTE_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
10308 /* Look for the ALLOC insn. */
10309 if (INSN_CODE (insn
) == CODE_FOR_alloc
)
10311 rtx dest
= SET_DEST (XVECEXP (PATTERN (insn
), 0, 0));
10312 int dest_regno
= REGNO (dest
);
10314 /* If this is the final destination for ar.pfs, then this must
10315 be the alloc in the prologue. */
10316 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
10319 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
10320 ia64_dbx_register_number (dest_regno
));
10324 /* This must be an alloc before a sibcall. We must drop the
10325 old frame info. The easiest way to drop the old frame
10326 info is to ensure we had a ".restore sp" directive
10327 followed by a new prologue. If the procedure doesn't
10328 have a memory-stack frame, we'll issue a dummy ".restore
10330 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
10331 /* if haven't done process_epilogue() yet, do it now */
10332 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10334 fprintf (asm_out_file
, "\t.prologue\n");
10339 handled_one
= false;
10340 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
10341 switch (REG_NOTE_KIND (note
))
10343 case REG_CFA_ADJUST_CFA
:
10344 pat
= XEXP (note
, 0);
10346 pat
= PATTERN (insn
);
10347 process_cfa_adjust_cfa (asm_out_file
, pat
, insn
, unwind
, frame
);
10348 handled_one
= true;
10351 case REG_CFA_OFFSET
:
10352 pat
= XEXP (note
, 0);
10354 pat
= PATTERN (insn
);
10355 process_cfa_offset (asm_out_file
, pat
, unwind
);
10356 handled_one
= true;
10359 case REG_CFA_REGISTER
:
10360 pat
= XEXP (note
, 0);
10362 pat
= PATTERN (insn
);
10363 process_cfa_register (asm_out_file
, pat
, unwind
);
10364 handled_one
= true;
10367 case REG_FRAME_RELATED_EXPR
:
10368 case REG_CFA_DEF_CFA
:
10369 case REG_CFA_EXPRESSION
:
10370 case REG_CFA_RESTORE
:
10371 case REG_CFA_SET_VDRAP
:
10372 /* Not used in the ia64 port. */
10373 gcc_unreachable ();
10376 /* Not a frame-related note. */
10380 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10381 explicit action to take. No guessing required. */
10382 gcc_assert (handled_one
);
10385 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10388 ia64_asm_emit_except_personality (rtx personality
)
10390 fputs ("\t.personality\t", asm_out_file
);
10391 output_addr_const (asm_out_file
, personality
);
10392 fputc ('\n', asm_out_file
);
10395 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10398 ia64_asm_init_sections (void)
10400 exception_section
= get_unnamed_section (0, output_section_asm_op
,
10404 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10406 static enum unwind_info_type
10407 ia64_debug_unwind_info (void)
10415 IA64_BUILTIN_COPYSIGNQ
,
10416 IA64_BUILTIN_FABSQ
,
10417 IA64_BUILTIN_FLUSHRS
,
10419 IA64_BUILTIN_HUGE_VALQ
,
10421 IA64_BUILTIN_NANSQ
,
10425 static GTY(()) tree ia64_builtins
[(int) IA64_BUILTIN_max
];
10428 ia64_init_builtins (void)
10434 /* The __fpreg type. */
10435 fpreg_type
= make_node (REAL_TYPE
);
10436 TYPE_PRECISION (fpreg_type
) = 82;
10437 layout_type (fpreg_type
);
10438 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
10440 /* The __float80 type. */
10441 if (float64x_type_node
!= NULL_TREE
10442 && TYPE_MODE (float64x_type_node
) == XFmode
)
10443 float80_type
= float64x_type_node
;
10446 float80_type
= make_node (REAL_TYPE
);
10447 TYPE_PRECISION (float80_type
) = 80;
10448 layout_type (float80_type
);
10450 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
10452 /* The __float128 type. */
10456 tree const_string_type
10457 = build_pointer_type (build_qualified_type
10458 (char_type_node
, TYPE_QUAL_CONST
));
10460 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
10463 /* TFmode support builtins. */
10464 ftype
= build_function_type_list (float128_type_node
, NULL_TREE
);
10465 decl
= add_builtin_function ("__builtin_infq", ftype
,
10466 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
10468 ia64_builtins
[IA64_BUILTIN_INFQ
] = decl
;
10470 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
10471 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
10473 ia64_builtins
[IA64_BUILTIN_HUGE_VALQ
] = decl
;
10475 ftype
= build_function_type_list (float128_type_node
,
10478 decl
= add_builtin_function ("__builtin_nanq", ftype
,
10479 IA64_BUILTIN_NANQ
, BUILT_IN_MD
,
10480 "nanq", NULL_TREE
);
10481 TREE_READONLY (decl
) = 1;
10482 ia64_builtins
[IA64_BUILTIN_NANQ
] = decl
;
10484 decl
= add_builtin_function ("__builtin_nansq", ftype
,
10485 IA64_BUILTIN_NANSQ
, BUILT_IN_MD
,
10486 "nansq", NULL_TREE
);
10487 TREE_READONLY (decl
) = 1;
10488 ia64_builtins
[IA64_BUILTIN_NANSQ
] = decl
;
10490 ftype
= build_function_type_list (float128_type_node
,
10491 float128_type_node
,
10493 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
10494 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
10495 "__fabstf2", NULL_TREE
);
10496 TREE_READONLY (decl
) = 1;
10497 ia64_builtins
[IA64_BUILTIN_FABSQ
] = decl
;
10499 ftype
= build_function_type_list (float128_type_node
,
10500 float128_type_node
,
10501 float128_type_node
,
10503 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
10504 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
10505 "__copysigntf3", NULL_TREE
);
10506 TREE_READONLY (decl
) = 1;
10507 ia64_builtins
[IA64_BUILTIN_COPYSIGNQ
] = decl
;
10510 /* Under HPUX, this is a synonym for "long double". */
10511 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
10514 /* Fwrite on VMS is non-standard. */
10515 #if TARGET_ABI_OPEN_VMS
10516 vms_patch_builtins ();
10519 #define def_builtin(name, type, code) \
10520 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10523 decl
= def_builtin ("__builtin_ia64_bsp",
10524 build_function_type_list (ptr_type_node
, NULL_TREE
),
10526 ia64_builtins
[IA64_BUILTIN_BSP
] = decl
;
10528 decl
= def_builtin ("__builtin_ia64_flushrs",
10529 build_function_type_list (void_type_node
, NULL_TREE
),
10530 IA64_BUILTIN_FLUSHRS
);
10531 ia64_builtins
[IA64_BUILTIN_FLUSHRS
] = decl
;
10537 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
10538 set_user_assembler_name (decl
, "_Isfinite");
10539 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
10540 set_user_assembler_name (decl
, "_Isfinitef");
10541 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEL
)) != NULL_TREE
)
10542 set_user_assembler_name (decl
, "_Isfinitef128");
10547 ia64_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
10548 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
10550 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
10552 enum ia64_builtins fn_code
= (enum ia64_builtins
)
10553 DECL_FUNCTION_CODE (fndecl
);
10556 case IA64_BUILTIN_NANQ
:
10557 case IA64_BUILTIN_NANSQ
:
10559 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
10560 const char *str
= c_getstr (*args
);
10561 int quiet
= fn_code
== IA64_BUILTIN_NANQ
;
10562 REAL_VALUE_TYPE real
;
10564 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
10565 return build_real (type
, real
);
10574 #ifdef SUBTARGET_FOLD_BUILTIN
10575 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
10582 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
10583 machine_mode mode ATTRIBUTE_UNUSED
,
10584 int ignore ATTRIBUTE_UNUSED
)
10586 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
10587 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
10591 case IA64_BUILTIN_BSP
:
10592 if (! target
|| ! register_operand (target
, DImode
))
10593 target
= gen_reg_rtx (DImode
);
10594 emit_insn (gen_bsp_value (target
));
10595 #ifdef POINTERS_EXTEND_UNSIGNED
10596 target
= convert_memory_address (ptr_mode
, target
);
10600 case IA64_BUILTIN_FLUSHRS
:
10601 emit_insn (gen_flushrs ());
10604 case IA64_BUILTIN_INFQ
:
10605 case IA64_BUILTIN_HUGE_VALQ
:
10607 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
10608 REAL_VALUE_TYPE inf
;
10612 tmp
= const_double_from_real_value (inf
, target_mode
);
10614 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
10617 target
= gen_reg_rtx (target_mode
);
10619 emit_move_insn (target
, tmp
);
10623 case IA64_BUILTIN_NANQ
:
10624 case IA64_BUILTIN_NANSQ
:
10625 case IA64_BUILTIN_FABSQ
:
10626 case IA64_BUILTIN_COPYSIGNQ
:
10627 return expand_call (exp
, target
, ignore
);
10630 gcc_unreachable ();
10636 /* Return the ia64 builtin for CODE. */
10639 ia64_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
10641 if (code
>= IA64_BUILTIN_max
)
10642 return error_mark_node
;
10644 return ia64_builtins
[code
];
10647 /* Implement TARGET_FUNCTION_ARG_PADDING.
10649 For the HP-UX IA64 aggregate parameters are passed stored in the
10650 most significant bits of the stack slot. */
10652 static pad_direction
10653 ia64_function_arg_padding (machine_mode mode
, const_tree type
)
10655 /* Exception to normal case for structures/unions/etc. */
10658 && AGGREGATE_TYPE_P (type
)
10659 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
10662 /* Fall back to the default. */
10663 return default_function_arg_padding (mode
, type
);
10666 /* Emit text to declare externally defined variables and functions, because
10667 the Intel assembler does not support undefined externals. */
10670 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10672 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10673 set in order to avoid putting out names that are never really
10675 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10677 /* maybe_assemble_visibility will return 1 if the assembler
10678 visibility directive is output. */
10679 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10680 && maybe_assemble_visibility (decl
));
10682 /* GNU as does not need anything here, but the HP linker does
10683 need something for external functions. */
10684 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10685 && TREE_CODE (decl
) == FUNCTION_DECL
)
10686 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10687 else if (need_visibility
&& !TARGET_GNU_AS
)
10688 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10692 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10693 modes of word_mode and larger. Rename the TFmode libfuncs using the
10694 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10695 backward compatibility. */
10698 ia64_init_libfuncs (void)
10700 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10701 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10702 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10703 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10705 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10706 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10707 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10708 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10709 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10711 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10712 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10713 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10714 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10715 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10716 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10718 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10719 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10720 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10721 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10722 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10724 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10725 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10726 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10727 /* HP-UX 11.23 libc does not have a function for unsigned
10728 SImode-to-TFmode conversion. */
10729 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10732 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10735 ia64_hpux_init_libfuncs (void)
10737 ia64_init_libfuncs ();
10739 /* The HP SI millicode division and mod functions expect DI arguments.
10740 By turning them off completely we avoid using both libgcc and the
10741 non-standard millicode routines and use the HP DI millicode routines
10744 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10745 set_optab_libfunc (udiv_optab
, SImode
, 0);
10746 set_optab_libfunc (smod_optab
, SImode
, 0);
10747 set_optab_libfunc (umod_optab
, SImode
, 0);
10749 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10750 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10751 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10752 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10754 /* HP-UX libc has TF min/max/abs routines in it. */
10755 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10756 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10757 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10759 /* ia64_expand_compare uses this. */
10760 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10762 /* These should never be used. */
10763 set_optab_libfunc (eq_optab
, TFmode
, 0);
10764 set_optab_libfunc (ne_optab
, TFmode
, 0);
10765 set_optab_libfunc (gt_optab
, TFmode
, 0);
10766 set_optab_libfunc (ge_optab
, TFmode
, 0);
10767 set_optab_libfunc (lt_optab
, TFmode
, 0);
10768 set_optab_libfunc (le_optab
, TFmode
, 0);
10771 /* Rename the division and modulus functions in VMS. */
10774 ia64_vms_init_libfuncs (void)
10776 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10777 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10778 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10779 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10780 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10781 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10782 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10783 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10784 #ifdef MEM_LIBFUNCS_INIT
10789 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10790 the HPUX conventions. */
10793 ia64_sysv4_init_libfuncs (void)
10795 ia64_init_libfuncs ();
10797 /* These functions are not part of the HPUX TFmode interface. We
10798 use them instead of _U_Qfcmp, which doesn't work the way we
10800 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10801 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10802 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10803 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10804 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10805 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10807 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10808 glibc doesn't have them. */
10814 ia64_soft_fp_init_libfuncs (void)
10819 ia64_vms_valid_pointer_mode (scalar_int_mode mode
)
10821 return (mode
== SImode
|| mode
== DImode
);
10824 /* For HPUX, it is illegal to have relocations in shared segments. */
10827 ia64_hpux_reloc_rw_mask (void)
10832 /* For others, relax this so that relocations to local data goes in
10833 read-only segments, but we still cannot allow global relocations
10834 in read-only segments. */
10837 ia64_reloc_rw_mask (void)
10839 return flag_pic
? 3 : 2;
10842 /* Return the section to use for X. The only special thing we do here
10843 is to honor small data. */
10846 ia64_select_rtx_section (machine_mode mode
, rtx x
,
10847 unsigned HOST_WIDE_INT align
)
10849 if (GET_MODE_SIZE (mode
) > 0
10850 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10851 && !TARGET_NO_SDATA
)
10852 return sdata_section
;
10854 return default_elf_select_rtx_section (mode
, x
, align
);
10857 static unsigned int
10858 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10860 unsigned int flags
= 0;
10862 if (strcmp (name
, ".sdata") == 0
10863 || strncmp (name
, ".sdata.", 7) == 0
10864 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
10865 || strncmp (name
, ".sdata2.", 8) == 0
10866 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
10867 || strcmp (name
, ".sbss") == 0
10868 || strncmp (name
, ".sbss.", 6) == 0
10869 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
10870 flags
= SECTION_SMALL
;
10872 flags
|= default_section_type_flags (decl
, name
, reloc
);
10876 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10877 structure type and that the address of that type should be passed
10878 in out0, rather than in r8. */
10881 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10883 tree ret_type
= TREE_TYPE (fntype
);
10885 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10886 as the structure return address parameter, if the return value
10887 type has a non-trivial copy constructor or destructor. It is not
10888 clear if this same convention should be used for other
10889 programming languages. Until G++ 3.4, we incorrectly used r8 for
10890 these return values. */
10891 return (abi_version_at_least (2)
10893 && TYPE_MODE (ret_type
) == BLKmode
10894 && TREE_ADDRESSABLE (ret_type
)
10895 && lang_GNU_CXX ());
10898 /* Output the assembler code for a thunk function. THUNK_DECL is the
10899 declaration for the thunk function itself, FUNCTION is the decl for
10900 the target function. DELTA is an immediate constant offset to be
10901 added to THIS. If VCALL_OFFSET is nonzero, the word at
10902 *(*this + vcall_offset) should be added to THIS. */
10905 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10906 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10909 rtx this_rtx
, funexp
;
10911 unsigned int this_parmno
;
10912 unsigned int this_regno
;
10915 reload_completed
= 1;
10916 epilogue_completed
= 1;
10918 /* Set things up as ia64_expand_prologue might. */
10919 last_scratch_gr_reg
= 15;
10921 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10922 current_frame_info
.spill_cfa_off
= -16;
10923 current_frame_info
.n_input_regs
= 1;
10924 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10926 /* Mark the end of the (empty) prologue. */
10927 emit_note (NOTE_INSN_PROLOGUE_END
);
10929 /* Figure out whether "this" will be the first parameter (the
10930 typical case) or the second parameter (as happens when the
10931 virtual function returns certain class objects). */
10933 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10935 this_regno
= IN_REG (this_parmno
);
10936 if (!TARGET_REG_NAMES
)
10937 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10939 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10941 /* Apply the constant offset, if required. */
10942 delta_rtx
= GEN_INT (delta
);
10945 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10946 REG_POINTER (tmp
) = 1;
10947 if (delta
&& satisfies_constraint_I (delta_rtx
))
10949 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10953 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10957 if (!satisfies_constraint_I (delta_rtx
))
10959 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10960 emit_move_insn (tmp
, delta_rtx
);
10963 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
10966 /* Apply the offset from the vtable, if required. */
10969 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
10970 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10974 rtx t
= gen_rtx_REG (ptr_mode
, 2);
10975 REG_POINTER (t
) = 1;
10976 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
10977 if (satisfies_constraint_I (vcall_offset_rtx
))
10979 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
10983 emit_insn (gen_ptr_extend (tmp
, t
));
10986 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
10990 if (!satisfies_constraint_J (vcall_offset_rtx
))
10992 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
10993 emit_move_insn (tmp2
, vcall_offset_rtx
);
10994 vcall_offset_rtx
= tmp2
;
10996 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
11000 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
11002 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
11004 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
11007 /* Generate a tail call to the target function. */
11008 if (! TREE_USED (function
))
11010 assemble_external (function
);
11011 TREE_USED (function
) = 1;
11013 funexp
= XEXP (DECL_RTL (function
), 0);
11014 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
11015 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
11016 insn
= get_last_insn ();
11017 SIBLING_CALL_P (insn
) = 1;
11019 /* Code generation for calls relies on splitting. */
11020 reload_completed
= 1;
11021 epilogue_completed
= 1;
11022 try_split (PATTERN (insn
), insn
, 0);
11026 /* Run just enough of rest_of_compilation to get the insns emitted.
11027 There's not really enough bulk here to make other passes such as
11028 instruction scheduling worth while. Note that use_thunk calls
11029 assemble_start_function and assemble_end_function. */
11031 emit_all_insn_group_barriers (NULL
);
11032 insn
= get_insns ();
11033 shorten_branches (insn
);
11034 final_start_function (insn
, file
, 1);
11035 final (insn
, file
, 1);
11036 final_end_function ();
11038 reload_completed
= 0;
11039 epilogue_completed
= 0;
11042 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
11045 ia64_struct_value_rtx (tree fntype
,
11046 int incoming ATTRIBUTE_UNUSED
)
11048 if (TARGET_ABI_OPEN_VMS
||
11049 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
11051 return gen_rtx_REG (Pmode
, GR_REG (8));
11055 ia64_scalar_mode_supported_p (scalar_mode mode
)
11081 ia64_vector_mode_supported_p (machine_mode mode
)
11098 /* Implement the FUNCTION_PROFILER macro. */
11101 ia64_output_function_profiler (FILE *file
, int labelno
)
11103 bool indirect_call
;
11105 /* If the function needs a static chain and the static chain
11106 register is r15, we use an indirect call so as to bypass
11107 the PLT stub in case the executable is dynamically linked,
11108 because the stub clobbers r15 as per 5.3.6 of the psABI.
11109 We don't need to do that in non canonical PIC mode. */
11111 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
11113 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
11114 indirect_call
= true;
11117 indirect_call
= false;
11120 fputs ("\t.prologue 4, r40\n", file
);
11122 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
11123 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
11125 if (NO_PROFILE_COUNTERS
)
11126 fputs ("\tmov out3 = r0\n", file
);
11130 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11132 if (TARGET_AUTO_PIC
)
11133 fputs ("\tmovl out3 = @gprel(", file
);
11135 fputs ("\taddl out3 = @ltoff(", file
);
11136 assemble_name (file
, buf
);
11137 if (TARGET_AUTO_PIC
)
11138 fputs (")\n", file
);
11140 fputs ("), r1\n", file
);
11144 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
11145 fputs ("\t;;\n", file
);
11147 fputs ("\t.save rp, r42\n", file
);
11148 fputs ("\tmov out2 = b0\n", file
);
11150 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
11151 fputs ("\t.body\n", file
);
11152 fputs ("\tmov out1 = r1\n", file
);
11155 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
11156 fputs ("\tmov b6 = r16\n", file
);
11157 fputs ("\tld8 r1 = [r14]\n", file
);
11158 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
11161 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
11164 static GTY(()) rtx mcount_func_rtx
;
11166 gen_mcount_func_rtx (void)
11168 if (!mcount_func_rtx
)
11169 mcount_func_rtx
= init_one_libfunc ("_mcount");
11170 return mcount_func_rtx
;
11174 ia64_profile_hook (int labelno
)
11178 if (NO_PROFILE_COUNTERS
)
11179 label
= const0_rtx
;
11183 const char *label_name
;
11184 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11185 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
11186 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
11187 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
11189 ip
= gen_reg_rtx (Pmode
);
11190 emit_insn (gen_ip_value (ip
));
11191 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
11193 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
11198 /* Return the mangling of TYPE if it is an extended fundamental type. */
11200 static const char *
11201 ia64_mangle_type (const_tree type
)
11203 type
= TYPE_MAIN_VARIANT (type
);
11205 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
11206 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
11209 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11211 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
11213 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11214 an extended mangling. Elsewhere, "e" is available since long
11215 double is 80 bits. */
11216 if (TYPE_MODE (type
) == XFmode
)
11217 return TARGET_HPUX
? "u9__float80" : "e";
11218 if (TYPE_MODE (type
) == RFmode
)
11219 return "u7__fpreg";
11223 /* Return the diagnostic message string if conversion from FROMTYPE to
11224 TOTYPE is not allowed, NULL otherwise. */
11225 static const char *
11226 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
11228 /* Reject nontrivial conversion to or from __fpreg. */
11229 if (TYPE_MODE (fromtype
) == RFmode
11230 && TYPE_MODE (totype
) != RFmode
11231 && TYPE_MODE (totype
) != VOIDmode
)
11232 return N_("invalid conversion from %<__fpreg%>");
11233 if (TYPE_MODE (totype
) == RFmode
11234 && TYPE_MODE (fromtype
) != RFmode
)
11235 return N_("invalid conversion to %<__fpreg%>");
11239 /* Return the diagnostic message string if the unary operation OP is
11240 not permitted on TYPE, NULL otherwise. */
11241 static const char *
11242 ia64_invalid_unary_op (int op
, const_tree type
)
11244 /* Reject operations on __fpreg other than unary + or &. */
11245 if (TYPE_MODE (type
) == RFmode
11246 && op
!= CONVERT_EXPR
11247 && op
!= ADDR_EXPR
)
11248 return N_("invalid operation on %<__fpreg%>");
11252 /* Return the diagnostic message string if the binary operation OP is
11253 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11254 static const char *
11255 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
11257 /* Reject operations on __fpreg. */
11258 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
11259 return N_("invalid operation on %<__fpreg%>");
11263 /* HP-UX version_id attribute.
11264 For object foo, if the version_id is set to 1234 put out an alias
11265 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11266 other than an alias statement because it is an illegal symbol name. */
11269 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
11270 tree name ATTRIBUTE_UNUSED
,
11272 int flags ATTRIBUTE_UNUSED
,
11273 bool *no_add_attrs
)
11275 tree arg
= TREE_VALUE (args
);
11277 if (TREE_CODE (arg
) != STRING_CST
)
11279 error("version attribute is not a string");
11280 *no_add_attrs
= true;
11286 /* Target hook for c_mode_for_suffix. */
11288 static machine_mode
11289 ia64_c_mode_for_suffix (char suffix
)
11299 static GTY(()) rtx ia64_dconst_0_5_rtx
;
11302 ia64_dconst_0_5 (void)
11304 if (! ia64_dconst_0_5_rtx
)
11306 REAL_VALUE_TYPE rv
;
11307 real_from_string (&rv
, "0.5");
11308 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
11310 return ia64_dconst_0_5_rtx
;
11313 static GTY(()) rtx ia64_dconst_0_375_rtx
;
11316 ia64_dconst_0_375 (void)
11318 if (! ia64_dconst_0_375_rtx
)
11320 REAL_VALUE_TYPE rv
;
11321 real_from_string (&rv
, "0.375");
11322 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
11324 return ia64_dconst_0_375_rtx
;
11327 static machine_mode
11328 ia64_get_reg_raw_mode (int regno
)
11330 if (FR_REGNO_P (regno
))
11332 return default_get_reg_raw_mode(regno
);
11335 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11339 ia64_member_type_forces_blk (const_tree
, machine_mode mode
)
11341 return TARGET_HPUX
&& mode
== TFmode
;
11344 /* Always default to .text section until HP-UX linker is fixed. */
11346 ATTRIBUTE_UNUSED
static section
*
11347 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED
,
11348 enum node_frequency freq ATTRIBUTE_UNUSED
,
11349 bool startup ATTRIBUTE_UNUSED
,
11350 bool exit ATTRIBUTE_UNUSED
)
11355 /* Construct (set target (vec_select op0 (parallel perm))) and
11356 return true if that's a valid instruction in the active ISA. */
11359 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
11361 rtx rperm
[MAX_VECT_LEN
], x
;
11364 for (i
= 0; i
< nelt
; ++i
)
11365 rperm
[i
] = GEN_INT (perm
[i
]);
11367 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
11368 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
11369 x
= gen_rtx_SET (target
, x
);
11371 rtx_insn
*insn
= emit_insn (x
);
11372 if (recog_memoized (insn
) < 0)
11374 remove_insn (insn
);
11380 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11383 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
11384 const unsigned char *perm
, unsigned nelt
)
11386 machine_mode v2mode
;
11389 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0
)).exists (&v2mode
))
11391 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
11392 return expand_vselect (target
, x
, perm
, nelt
);
11395 /* Try to expand a no-op permutation. */
11398 expand_vec_perm_identity (struct expand_vec_perm_d
*d
)
11400 unsigned i
, nelt
= d
->nelt
;
11402 for (i
= 0; i
< nelt
; ++i
)
11403 if (d
->perm
[i
] != i
)
11407 emit_move_insn (d
->target
, d
->op0
);
11412 /* Try to expand D via a shrp instruction. */
11415 expand_vec_perm_shrp (struct expand_vec_perm_d
*d
)
11417 unsigned i
, nelt
= d
->nelt
, shift
, mask
;
11420 /* ??? Don't force V2SFmode into the integer registers. */
11421 if (d
->vmode
== V2SFmode
)
11424 mask
= (d
->one_operand_p
? nelt
- 1 : 2 * nelt
- 1);
11426 shift
= d
->perm
[0];
11427 if (BYTES_BIG_ENDIAN
&& shift
> nelt
)
11430 for (i
= 1; i
< nelt
; ++i
)
11431 if (d
->perm
[i
] != ((shift
+ i
) & mask
))
11437 hi
= shift
< nelt
? d
->op1
: d
->op0
;
11438 lo
= shift
< nelt
? d
->op0
: d
->op1
;
11442 shift
*= GET_MODE_UNIT_SIZE (d
->vmode
) * BITS_PER_UNIT
;
11444 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11445 gcc_assert (IN_RANGE (shift
, 1, 63));
11447 /* Recall that big-endian elements are numbered starting at the top of
11448 the register. Ideally we'd have a shift-left-pair. But since we
11449 don't, convert to a shift the other direction. */
11450 if (BYTES_BIG_ENDIAN
)
11451 shift
= 64 - shift
;
11453 tmp
= gen_reg_rtx (DImode
);
11454 hi
= gen_lowpart (DImode
, hi
);
11455 lo
= gen_lowpart (DImode
, lo
);
11456 emit_insn (gen_shrp (tmp
, hi
, lo
, GEN_INT (shift
)));
11458 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, tmp
));
11462 /* Try to instantiate D in a single instruction. */
11465 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
11467 unsigned i
, nelt
= d
->nelt
;
11468 unsigned char perm2
[MAX_VECT_LEN
];
11470 /* Try single-operand selections. */
11471 if (d
->one_operand_p
)
11473 if (expand_vec_perm_identity (d
))
11475 if (expand_vselect (d
->target
, d
->op0
, d
->perm
, nelt
))
11479 /* Try two operand selections. */
11480 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
11483 /* Recognize interleave style patterns with reversed operands. */
11484 if (!d
->one_operand_p
)
11486 for (i
= 0; i
< nelt
; ++i
)
11488 unsigned e
= d
->perm
[i
];
11496 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
11500 if (expand_vec_perm_shrp (d
))
11503 /* ??? Look for deposit-like permutations where most of the result
11504 comes from one vector unchanged and the rest comes from a
11505 sequential hunk of the other vector. */
11510 /* Pattern match broadcast permutations. */
11513 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
11515 unsigned i
, elt
, nelt
= d
->nelt
;
11516 unsigned char perm2
[2];
11520 if (!d
->one_operand_p
)
11524 for (i
= 1; i
< nelt
; ++i
)
11525 if (d
->perm
[i
] != elt
)
11532 /* Implementable by interleave. */
11534 perm2
[1] = elt
+ 2;
11535 ok
= expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, 2);
11540 /* Implementable by extract + broadcast. */
11541 if (BYTES_BIG_ENDIAN
)
11543 elt
*= BITS_PER_UNIT
;
11544 temp
= gen_reg_rtx (DImode
);
11545 emit_insn (gen_extzv (temp
, gen_lowpart (DImode
, d
->op0
),
11546 GEN_INT (8), GEN_INT (elt
)));
11547 emit_insn (gen_mux1_brcst_qi (d
->target
, gen_lowpart (QImode
, temp
)));
11551 /* Should have been matched directly by vec_select. */
11553 gcc_unreachable ();
11559 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11560 two vector permutation into a single vector permutation by using
11561 an interleave operation to merge the vectors. */
11564 expand_vec_perm_interleave_2 (struct expand_vec_perm_d
*d
)
11566 struct expand_vec_perm_d dremap
, dfinal
;
11567 unsigned char remap
[2 * MAX_VECT_LEN
];
11568 unsigned contents
, i
, nelt
, nelt2
;
11569 unsigned h0
, h1
, h2
, h3
;
11573 if (d
->one_operand_p
)
11579 /* Examine from whence the elements come. */
11581 for (i
= 0; i
< nelt
; ++i
)
11582 contents
|= 1u << d
->perm
[i
];
11584 memset (remap
, 0xff, sizeof (remap
));
11587 h0
= (1u << nelt2
) - 1;
11590 h3
= h0
<< (nelt
+ nelt2
);
11592 if ((contents
& (h0
| h2
)) == contents
) /* punpck even halves */
11594 for (i
= 0; i
< nelt
; ++i
)
11596 unsigned which
= i
/ 2 + (i
& 1 ? nelt
: 0);
11598 dremap
.perm
[i
] = which
;
11601 else if ((contents
& (h1
| h3
)) == contents
) /* punpck odd halves */
11603 for (i
= 0; i
< nelt
; ++i
)
11605 unsigned which
= i
/ 2 + nelt2
+ (i
& 1 ? nelt
: 0);
11607 dremap
.perm
[i
] = which
;
11610 else if ((contents
& 0x5555) == contents
) /* mix even elements */
11612 for (i
= 0; i
< nelt
; ++i
)
11614 unsigned which
= (i
& ~1) + (i
& 1 ? nelt
: 0);
11616 dremap
.perm
[i
] = which
;
11619 else if ((contents
& 0xaaaa) == contents
) /* mix odd elements */
11621 for (i
= 0; i
< nelt
; ++i
)
11623 unsigned which
= (i
| 1) + (i
& 1 ? nelt
: 0);
11625 dremap
.perm
[i
] = which
;
11628 else if (floor_log2 (contents
) - ctz_hwi (contents
) < (int)nelt
) /* shrp */
11630 unsigned shift
= ctz_hwi (contents
);
11631 for (i
= 0; i
< nelt
; ++i
)
11633 unsigned which
= (i
+ shift
) & (2 * nelt
- 1);
11635 dremap
.perm
[i
] = which
;
11641 /* Use the remapping array set up above to move the elements from their
11642 swizzled locations into their final destinations. */
11644 for (i
= 0; i
< nelt
; ++i
)
11646 unsigned e
= remap
[d
->perm
[i
]];
11647 gcc_assert (e
< nelt
);
11648 dfinal
.perm
[i
] = e
;
11651 dfinal
.op0
= gen_raw_REG (dfinal
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11653 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
11654 dfinal
.op1
= dfinal
.op0
;
11655 dfinal
.one_operand_p
= true;
11656 dremap
.target
= dfinal
.op0
;
11658 /* Test if the final remap can be done with a single insn. For V4HImode
11659 this *will* succeed. For V8QImode or V2SImode it may not. */
11661 ok
= expand_vec_perm_1 (&dfinal
);
11662 seq
= get_insns ();
11669 ok
= expand_vec_perm_1 (&dremap
);
11676 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11677 constant permutation via two mux2 and a merge. */
11680 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d
*d
)
11682 unsigned char perm2
[4];
11685 rtx t0
, t1
, mask
, x
;
11688 if (d
->vmode
!= V4HImode
|| d
->one_operand_p
)
11693 for (i
= 0; i
< 4; ++i
)
11695 perm2
[i
] = d
->perm
[i
] & 3;
11696 rmask
[i
] = (d
->perm
[i
] & 4 ? const0_rtx
: constm1_rtx
);
11698 mask
= gen_rtx_CONST_VECTOR (V4HImode
, gen_rtvec_v (4, rmask
));
11699 mask
= force_reg (V4HImode
, mask
);
11701 t0
= gen_reg_rtx (V4HImode
);
11702 t1
= gen_reg_rtx (V4HImode
);
11704 ok
= expand_vselect (t0
, d
->op0
, perm2
, 4);
11706 ok
= expand_vselect (t1
, d
->op1
, perm2
, 4);
11709 x
= gen_rtx_AND (V4HImode
, mask
, t0
);
11710 emit_insn (gen_rtx_SET (t0
, x
));
11712 x
= gen_rtx_NOT (V4HImode
, mask
);
11713 x
= gen_rtx_AND (V4HImode
, x
, t1
);
11714 emit_insn (gen_rtx_SET (t1
, x
));
11716 x
= gen_rtx_IOR (V4HImode
, t0
, t1
);
11717 emit_insn (gen_rtx_SET (d
->target
, x
));
11722 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11723 With all of the interface bits taken care of, perform the expansion
11724 in D and return true on success. */
11727 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
11729 if (expand_vec_perm_1 (d
))
11731 if (expand_vec_perm_broadcast (d
))
11733 if (expand_vec_perm_interleave_2 (d
))
11735 if (expand_vec_perm_v4hi_5 (d
))
11741 ia64_expand_vec_perm_const (rtx operands
[4])
11743 struct expand_vec_perm_d d
;
11744 unsigned char perm
[MAX_VECT_LEN
];
11745 int i
, nelt
, which
;
11748 d
.target
= operands
[0];
11749 d
.op0
= operands
[1];
11750 d
.op1
= operands
[2];
11753 d
.vmode
= GET_MODE (d
.target
);
11754 gcc_assert (VECTOR_MODE_P (d
.vmode
));
11755 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11756 d
.testing_p
= false;
11758 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
11759 gcc_assert (XVECLEN (sel
, 0) == nelt
);
11760 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
11762 for (i
= which
= 0; i
< nelt
; ++i
)
11764 rtx e
= XVECEXP (sel
, 0, i
);
11765 int ei
= INTVAL (e
) & (2 * nelt
- 1);
11767 which
|= (ei
< nelt
? 1 : 2);
11778 if (!rtx_equal_p (d
.op0
, d
.op1
))
11780 d
.one_operand_p
= false;
11784 /* The elements of PERM do not suggest that only the first operand
11785 is used, but both operands are identical. Allow easier matching
11786 of the permutation by folding the permutation into the single
11788 for (i
= 0; i
< nelt
; ++i
)
11789 if (d
.perm
[i
] >= nelt
)
11795 d
.one_operand_p
= true;
11799 for (i
= 0; i
< nelt
; ++i
)
11802 d
.one_operand_p
= true;
11806 if (ia64_expand_vec_perm_const_1 (&d
))
11809 /* If the mask says both arguments are needed, but they are the same,
11810 the above tried to expand with one_operand_p true. If that didn't
11811 work, retry with one_operand_p false, as that's what we used in _ok. */
11812 if (which
== 3 && d
.one_operand_p
)
11814 memcpy (d
.perm
, perm
, sizeof (perm
));
11815 d
.one_operand_p
= false;
11816 return ia64_expand_vec_perm_const_1 (&d
);
11822 /* Implement targetm.vectorize.vec_perm_const_ok. */
11825 ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
11826 const unsigned char *sel
)
11828 struct expand_vec_perm_d d
;
11829 unsigned int i
, nelt
, which
;
11833 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11834 d
.testing_p
= true;
11836 /* Extract the values from the vector CST into the permutation
11838 memcpy (d
.perm
, sel
, nelt
);
11839 for (i
= which
= 0; i
< nelt
; ++i
)
11841 unsigned char e
= d
.perm
[i
];
11842 gcc_assert (e
< 2 * nelt
);
11843 which
|= (e
< nelt
? 1 : 2);
11846 /* For all elements from second vector, fold the elements to first. */
11848 for (i
= 0; i
< nelt
; ++i
)
11851 /* Check whether the mask can be applied to the vector type. */
11852 d
.one_operand_p
= (which
!= 3);
11854 /* Otherwise we have to go through the motions and see if we can
11855 figure out how to generate the requested permutation. */
11856 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11857 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
11858 if (!d
.one_operand_p
)
11859 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
11862 ret
= ia64_expand_vec_perm_const_1 (&d
);
11869 ia64_expand_vec_setv2sf (rtx operands
[3])
11871 struct expand_vec_perm_d d
;
11872 unsigned int which
;
11875 d
.target
= operands
[0];
11876 d
.op0
= operands
[0];
11877 d
.op1
= gen_reg_rtx (V2SFmode
);
11878 d
.vmode
= V2SFmode
;
11880 d
.one_operand_p
= false;
11881 d
.testing_p
= false;
11883 which
= INTVAL (operands
[2]);
11884 gcc_assert (which
<= 1);
11885 d
.perm
[0] = 1 - which
;
11886 d
.perm
[1] = which
+ 2;
11888 emit_insn (gen_fpack (d
.op1
, operands
[1], CONST0_RTX (SFmode
)));
11890 ok
= ia64_expand_vec_perm_const_1 (&d
);
11895 ia64_expand_vec_perm_even_odd (rtx target
, rtx op0
, rtx op1
, int odd
)
11897 struct expand_vec_perm_d d
;
11898 machine_mode vmode
= GET_MODE (target
);
11899 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
11907 d
.one_operand_p
= false;
11908 d
.testing_p
= false;
11910 for (i
= 0; i
< nelt
; ++i
)
11911 d
.perm
[i
] = i
* 2 + odd
;
11913 ok
= ia64_expand_vec_perm_const_1 (&d
);
11917 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11919 In BR regs, we can't change the DImode at all.
11920 In FP regs, we can't change FP values to integer values and vice versa,
11921 but we can change e.g. DImode to SImode, and V2SFmode into DImode. */
11924 ia64_can_change_mode_class (machine_mode from
, machine_mode to
,
11925 reg_class_t rclass
)
11927 if (reg_classes_intersect_p (rclass
, BR_REGS
))
11929 if (SCALAR_FLOAT_MODE_P (from
) != SCALAR_FLOAT_MODE_P (to
))
11930 return !reg_classes_intersect_p (rclass
, FR_REGS
);
11934 #include "gt-ia64.h"