1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "sched-int.h"
50 #include "target-def.h"
53 #include "langhooks.h"
54 #include "cfglayout.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
64 /* This is used for communication between ASM_OUTPUT_LABEL and
65 ASM_OUTPUT_LABELREF. */
66 int ia64_asm_output_label
= 0;
68 /* Register names for ia64_expand_prologue. */
69 static const char * const ia64_reg_numbers
[96] =
70 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79 "r104","r105","r106","r107","r108","r109","r110","r111",
80 "r112","r113","r114","r115","r116","r117","r118","r119",
81 "r120","r121","r122","r123","r124","r125","r126","r127"};
83 /* ??? These strings could be shared with REGISTER_NAMES. */
84 static const char * const ia64_input_reg_names
[8] =
85 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_local_reg_names
[80] =
89 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
100 /* ??? These strings could be shared with REGISTER_NAMES. */
101 static const char * const ia64_output_reg_names
[8] =
102 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
104 /* Which cpu are we scheduling for. */
105 enum processor_type ia64_tune
= PROCESSOR_ITANIUM2
;
107 /* Determines whether we run our final scheduling pass or not. We always
108 avoid the normal second scheduling pass. */
109 static int ia64_flag_schedule_insns2
;
111 /* Determines whether we run variable tracking in machine dependent
113 static int ia64_flag_var_tracking
;
115 /* Variables which are this size or smaller are put in the sdata/sbss
118 unsigned int ia64_section_threshold
;
120 /* The following variable is used by the DFA insn scheduler. The value is
121 TRUE if we do insn bundling instead of insn scheduling. */
133 number_of_ia64_frame_regs
136 /* Structure to be filled in by ia64_compute_frame_size with register
137 save masks and offsets for the current function. */
139 struct ia64_frame_info
141 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
142 the caller's scratch area. */
143 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
144 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
145 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
146 HARD_REG_SET mask
; /* mask of saved registers. */
147 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
148 registers or long-term scratches. */
149 int n_spilled
; /* number of spilled registers. */
150 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
151 int n_input_regs
; /* number of input registers used. */
152 int n_local_regs
; /* number of local registers used. */
153 int n_output_regs
; /* number of output registers used. */
154 int n_rotate_regs
; /* number of rotating registers used. */
156 char need_regstk
; /* true if a .regstk directive needed. */
157 char initialized
; /* true if the data is finalized. */
160 /* Current frame information calculated by ia64_compute_frame_size. */
161 static struct ia64_frame_info current_frame_info
;
162 /* The actual registers that are emitted. */
163 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
165 static int ia64_first_cycle_multipass_dfa_lookahead (void);
166 static void ia64_dependencies_evaluation_hook (rtx
, rtx
);
167 static void ia64_init_dfa_pre_cycle_insn (void);
168 static rtx
ia64_dfa_pre_cycle_insn (void);
169 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx
);
170 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx
);
171 static int ia64_dfa_new_cycle (FILE *, int, rtx
, int, int, int *);
172 static void ia64_h_i_d_extended (void);
173 static void * ia64_alloc_sched_context (void);
174 static void ia64_init_sched_context (void *, bool);
175 static void ia64_set_sched_context (void *);
176 static void ia64_clear_sched_context (void *);
177 static void ia64_free_sched_context (void *);
178 static int ia64_mode_to_int (enum machine_mode
);
179 static void ia64_set_sched_flags (spec_info_t
);
180 static ds_t
ia64_get_insn_spec_ds (rtx
);
181 static ds_t
ia64_get_insn_checked_ds (rtx
);
182 static bool ia64_skip_rtx_p (const_rtx
);
183 static int ia64_speculate_insn (rtx
, ds_t
, rtx
*);
184 static bool ia64_needs_block_p (int);
185 static rtx
ia64_gen_spec_check (rtx
, rtx
, ds_t
);
186 static int ia64_spec_check_p (rtx
);
187 static int ia64_spec_check_src_p (rtx
);
188 static rtx
gen_tls_get_addr (void);
189 static rtx
gen_thread_pointer (void);
190 static int find_gr_spill (enum ia64_frame_regs
, int);
191 static int next_scratch_gr_reg (void);
192 static void mark_reg_gr_used_mask (rtx
, void *);
193 static void ia64_compute_frame_size (HOST_WIDE_INT
);
194 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
195 static void finish_spill_pointers (void);
196 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
197 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
198 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
199 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
200 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
201 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
203 static bool ia64_can_eliminate (const int, const int);
204 static enum machine_mode
hfa_element_mode (const_tree
, bool);
205 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
207 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
209 static bool ia64_function_ok_for_sibcall (tree
, tree
);
210 static bool ia64_return_in_memory (const_tree
, const_tree
);
211 static bool ia64_rtx_costs (rtx
, int, int, int *, bool);
212 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
213 static void fix_range (const char *);
214 static bool ia64_handle_option (size_t, const char *, int);
215 static struct machine_function
* ia64_init_machine_status (void);
216 static void emit_insn_group_barriers (FILE *);
217 static void emit_all_insn_group_barriers (FILE *);
218 static void final_emit_insn_group_barriers (FILE *);
219 static void emit_predicate_relation_info (void);
220 static void ia64_reorg (void);
221 static bool ia64_in_small_data_p (const_tree
);
222 static void process_epilogue (FILE *, rtx
, bool, bool);
223 static int process_set (FILE *, rtx
, rtx
, bool, bool);
225 static bool ia64_assemble_integer (rtx
, unsigned int, int);
226 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
227 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
228 static void ia64_output_function_end_prologue (FILE *);
230 static int ia64_issue_rate (void);
231 static int ia64_adjust_cost_2 (rtx
, int, rtx
, int, dw_t
);
232 static void ia64_sched_init (FILE *, int, int);
233 static void ia64_sched_init_global (FILE *, int, int);
234 static void ia64_sched_finish_global (FILE *, int);
235 static void ia64_sched_finish (FILE *, int);
236 static int ia64_dfa_sched_reorder (FILE *, int, rtx
*, int *, int, int);
237 static int ia64_sched_reorder (FILE *, int, rtx
*, int *, int);
238 static int ia64_sched_reorder2 (FILE *, int, rtx
*, int *, int);
239 static int ia64_variable_issue (FILE *, int, rtx
, int);
241 static struct bundle_state
*get_free_bundle_state (void);
242 static void free_bundle_state (struct bundle_state
*);
243 static void initiate_bundle_states (void);
244 static void finish_bundle_states (void);
245 static unsigned bundle_state_hash (const void *);
246 static int bundle_state_eq_p (const void *, const void *);
247 static int insert_bundle_state (struct bundle_state
*);
248 static void initiate_bundle_state_table (void);
249 static void finish_bundle_state_table (void);
250 static int try_issue_nops (struct bundle_state
*, int);
251 static int try_issue_insn (struct bundle_state
*, rtx
);
252 static void issue_nops_and_insn (struct bundle_state
*, int, rtx
, int, int);
253 static int get_max_pos (state_t
);
254 static int get_template (state_t
, int);
256 static rtx
get_next_important_insn (rtx
, rtx
);
257 static bool important_for_bundling_p (rtx
);
258 static void bundling (FILE *, int, rtx
, rtx
);
260 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
261 HOST_WIDE_INT
, tree
);
262 static void ia64_file_start (void);
263 static void ia64_globalize_decl_name (FILE *, tree
);
265 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
266 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
267 static section
*ia64_select_rtx_section (enum machine_mode
, rtx
,
268 unsigned HOST_WIDE_INT
);
269 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
271 static unsigned int ia64_section_type_flags (tree
, const char *, int);
272 static void ia64_init_libfuncs (void)
274 static void ia64_hpux_init_libfuncs (void)
276 static void ia64_sysv4_init_libfuncs (void)
278 static void ia64_vms_init_libfuncs (void)
280 static void ia64_soft_fp_init_libfuncs (void)
282 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode
)
284 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
287 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
288 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
289 static void ia64_encode_section_info (tree
, rtx
, int);
290 static rtx
ia64_struct_value_rtx (tree
, int);
291 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
292 static bool ia64_scalar_mode_supported_p (enum machine_mode mode
);
293 static bool ia64_vector_mode_supported_p (enum machine_mode mode
);
294 static bool ia64_cannot_force_const_mem (rtx
);
295 static const char *ia64_mangle_type (const_tree
);
296 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
297 static const char *ia64_invalid_unary_op (int, const_tree
);
298 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
299 static enum machine_mode
ia64_c_mode_for_suffix (char);
300 static enum machine_mode
ia64_promote_function_mode (const_tree
,
305 static void ia64_trampoline_init (rtx
, tree
, rtx
);
306 static void ia64_override_options_after_change (void);
308 /* Table of valid machine attributes. */
309 static const struct attribute_spec ia64_attribute_table
[] =
311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
312 { "syscall_linkage", 0, 0, false, true, true, NULL
},
313 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
},
314 #if TARGET_ABI_OPEN_VMS
315 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute
},
317 { "version_id", 1, 1, true, false, false,
318 ia64_handle_version_id_attribute
},
319 { NULL
, 0, 0, false, false, false, NULL
}
322 /* Initialize the GCC target structure. */
323 #undef TARGET_ATTRIBUTE_TABLE
324 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
326 #undef TARGET_INIT_BUILTINS
327 #define TARGET_INIT_BUILTINS ia64_init_builtins
329 #undef TARGET_EXPAND_BUILTIN
330 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
332 #undef TARGET_ASM_BYTE_OP
333 #define TARGET_ASM_BYTE_OP "\tdata1\t"
334 #undef TARGET_ASM_ALIGNED_HI_OP
335 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
338 #undef TARGET_ASM_ALIGNED_DI_OP
339 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
340 #undef TARGET_ASM_UNALIGNED_HI_OP
341 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
342 #undef TARGET_ASM_UNALIGNED_SI_OP
343 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
344 #undef TARGET_ASM_UNALIGNED_DI_OP
345 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
346 #undef TARGET_ASM_INTEGER
347 #define TARGET_ASM_INTEGER ia64_assemble_integer
349 #undef TARGET_ASM_FUNCTION_PROLOGUE
350 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
351 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
352 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
353 #undef TARGET_ASM_FUNCTION_EPILOGUE
354 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
356 #undef TARGET_IN_SMALL_DATA_P
357 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
359 #undef TARGET_SCHED_ADJUST_COST_2
360 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
361 #undef TARGET_SCHED_ISSUE_RATE
362 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
365 #undef TARGET_SCHED_INIT
366 #define TARGET_SCHED_INIT ia64_sched_init
367 #undef TARGET_SCHED_FINISH
368 #define TARGET_SCHED_FINISH ia64_sched_finish
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
371 #undef TARGET_SCHED_FINISH_GLOBAL
372 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
373 #undef TARGET_SCHED_REORDER
374 #define TARGET_SCHED_REORDER ia64_sched_reorder
375 #undef TARGET_SCHED_REORDER2
376 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
378 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
379 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
381 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
382 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
384 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
385 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
386 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
387 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
389 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
390 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
391 ia64_first_cycle_multipass_dfa_lookahead_guard
393 #undef TARGET_SCHED_DFA_NEW_CYCLE
394 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
396 #undef TARGET_SCHED_H_I_D_EXTENDED
397 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
399 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
400 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
402 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
403 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
405 #undef TARGET_SCHED_SET_SCHED_CONTEXT
406 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
408 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
409 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
411 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
412 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
414 #undef TARGET_SCHED_SET_SCHED_FLAGS
415 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
417 #undef TARGET_SCHED_GET_INSN_SPEC_DS
418 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
420 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
421 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
423 #undef TARGET_SCHED_SPECULATE_INSN
424 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
426 #undef TARGET_SCHED_NEEDS_BLOCK_P
427 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
429 #undef TARGET_SCHED_GEN_SPEC_CHECK
430 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
432 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
433 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
434 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
436 #undef TARGET_SCHED_SKIP_RTX_P
437 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
439 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
440 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
444 #undef TARGET_ASM_OUTPUT_MI_THUNK
445 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
446 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
447 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
449 #undef TARGET_ASM_FILE_START
450 #define TARGET_ASM_FILE_START ia64_file_start
452 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
453 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
455 #undef TARGET_RTX_COSTS
456 #define TARGET_RTX_COSTS ia64_rtx_costs
457 #undef TARGET_ADDRESS_COST
458 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
460 #undef TARGET_UNSPEC_MAY_TRAP_P
461 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
463 #undef TARGET_MACHINE_DEPENDENT_REORG
464 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
469 #undef TARGET_SECTION_TYPE_FLAGS
470 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
473 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
474 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
477 #undef TARGET_PROMOTE_FUNCTION_MODE
478 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
480 /* ??? Investigate. */
482 #undef TARGET_PROMOTE_PROTOTYPES
483 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
486 #undef TARGET_STRUCT_VALUE_RTX
487 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
488 #undef TARGET_RETURN_IN_MEMORY
489 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
490 #undef TARGET_SETUP_INCOMING_VARARGS
491 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
492 #undef TARGET_STRICT_ARGUMENT_NAMING
493 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
494 #undef TARGET_MUST_PASS_IN_STACK
495 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
497 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
498 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
500 #undef TARGET_UNWIND_EMIT
501 #define TARGET_UNWIND_EMIT process_for_unwind_directive
503 #undef TARGET_SCALAR_MODE_SUPPORTED_P
504 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
506 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
508 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
509 in an order different from the specified program order. */
510 #undef TARGET_RELAXED_ORDERING
511 #define TARGET_RELAXED_ORDERING true
513 #undef TARGET_DEFAULT_TARGET_FLAGS
514 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
515 #undef TARGET_HANDLE_OPTION
516 #define TARGET_HANDLE_OPTION ia64_handle_option
518 #undef TARGET_CANNOT_FORCE_CONST_MEM
519 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
521 #undef TARGET_MANGLE_TYPE
522 #define TARGET_MANGLE_TYPE ia64_mangle_type
524 #undef TARGET_INVALID_CONVERSION
525 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
526 #undef TARGET_INVALID_UNARY_OP
527 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
528 #undef TARGET_INVALID_BINARY_OP
529 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
531 #undef TARGET_C_MODE_FOR_SUFFIX
532 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
534 #undef TARGET_CAN_ELIMINATE
535 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
537 #undef TARGET_TRAMPOLINE_INIT
538 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
540 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
541 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
543 struct gcc_target targetm
= TARGET_INITIALIZER
;
547 ADDR_AREA_NORMAL
, /* normal address area */
548 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
552 static GTY(()) tree small_ident1
;
553 static GTY(()) tree small_ident2
;
558 if (small_ident1
== 0)
560 small_ident1
= get_identifier ("small");
561 small_ident2
= get_identifier ("__small__");
565 /* Retrieve the address area that has been chosen for the given decl. */
567 static ia64_addr_area
568 ia64_get_addr_area (tree decl
)
572 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
578 id
= TREE_VALUE (TREE_VALUE (model_attr
));
579 if (id
== small_ident1
|| id
== small_ident2
)
580 return ADDR_AREA_SMALL
;
582 return ADDR_AREA_NORMAL
;
586 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
587 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
589 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
591 tree arg
, decl
= *node
;
594 arg
= TREE_VALUE (args
);
595 if (arg
== small_ident1
|| arg
== small_ident2
)
597 addr_area
= ADDR_AREA_SMALL
;
601 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
603 *no_add_attrs
= true;
606 switch (TREE_CODE (decl
))
609 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
611 && !TREE_STATIC (decl
))
613 error_at (DECL_SOURCE_LOCATION (decl
),
614 "an address area attribute cannot be specified for "
616 *no_add_attrs
= true;
618 area
= ia64_get_addr_area (decl
);
619 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
621 error ("address area of %q+D conflicts with previous "
622 "declaration", decl
);
623 *no_add_attrs
= true;
628 error_at (DECL_SOURCE_LOCATION (decl
),
629 "address area attribute cannot be specified for "
631 *no_add_attrs
= true;
635 warning (OPT_Wattributes
, "%qE attribute ignored",
637 *no_add_attrs
= true;
644 /* The section must have global and overlaid attributes. */
645 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
647 /* Part of the low level implementation of DEC Ada pragma Common_Object which
648 enables the shared use of variables stored in overlaid linker areas
649 corresponding to the use of Fortran COMMON. */
652 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
653 int flags ATTRIBUTE_UNUSED
,
661 DECL_COMMON (decl
) = 1;
662 id
= TREE_VALUE (args
);
663 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
664 val
= build_string (IDENTIFIER_LENGTH (id
), IDENTIFIER_POINTER (id
));
665 else if (TREE_CODE (id
) == STRING_CST
)
669 warning (OPT_Wattributes
,
670 "%qE attribute requires a string constant argument", name
);
671 *no_add_attrs
= true;
674 DECL_SECTION_NAME (decl
) = val
;
678 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
681 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
682 unsigned HOST_WIDE_INT size
,
685 tree attr
= DECL_ATTRIBUTES (decl
);
687 /* As common_object attribute set DECL_SECTION_NAME check it before
688 looking up the attribute. */
689 if (DECL_SECTION_NAME (decl
) && attr
)
690 attr
= lookup_attribute ("common_object", attr
);
696 /* Code from elfos.h. */
697 fprintf (file
, "%s", COMMON_ASM_OP
);
698 assemble_name (file
, name
);
699 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
700 size
, align
/ BITS_PER_UNIT
);
704 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
705 ASM_OUTPUT_LABEL (file
, name
);
706 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
710 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
713 ia64_vms_elf_asm_named_section (const char *name
, unsigned int flags
,
716 if (!(flags
& SECTION_VMS_OVERLAY
))
718 default_elf_asm_named_section (name
, flags
, decl
);
721 if (flags
!= (SECTION_VMS_OVERLAY
| SECTION_WRITE
))
724 if (flags
& SECTION_DECLARED
)
726 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
730 fprintf (asm_out_file
, "\t.section\t%s,\"awgO\"\n", name
);
734 ia64_encode_addr_area (tree decl
, rtx symbol
)
738 flags
= SYMBOL_REF_FLAGS (symbol
);
739 switch (ia64_get_addr_area (decl
))
741 case ADDR_AREA_NORMAL
: break;
742 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
743 default: gcc_unreachable ();
745 SYMBOL_REF_FLAGS (symbol
) = flags
;
749 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
751 default_encode_section_info (decl
, rtl
, first
);
753 /* Careful not to prod global register variables. */
754 if (TREE_CODE (decl
) == VAR_DECL
755 && GET_CODE (DECL_RTL (decl
)) == MEM
756 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
757 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
758 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
761 /* Return 1 if the operands of a move are ok. */
764 ia64_move_ok (rtx dst
, rtx src
)
766 /* If we're under init_recog_no_volatile, we'll not be able to use
767 memory_operand. So check the code directly and don't worry about
768 the validity of the underlying address, which should have been
769 checked elsewhere anyway. */
770 if (GET_CODE (dst
) != MEM
)
772 if (GET_CODE (src
) == MEM
)
774 if (register_operand (src
, VOIDmode
))
777 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
778 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
779 return src
== const0_rtx
;
781 return satisfies_constraint_G (src
);
784 /* Return 1 if the operands are ok for a floating point load pair. */
787 ia64_load_pair_ok (rtx dst
, rtx src
)
789 if (GET_CODE (dst
) != REG
|| !FP_REGNO_P (REGNO (dst
)))
791 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
793 switch (GET_CODE (XEXP (src
, 0)))
802 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
804 if (GET_CODE (adjust
) != CONST_INT
805 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
816 addp4_optimize_ok (rtx op1
, rtx op2
)
818 return (basereg_operand (op1
, GET_MODE(op1
)) !=
819 basereg_operand (op2
, GET_MODE(op2
)));
822 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
823 Return the length of the field, or <= 0 on failure. */
826 ia64_depz_field_mask (rtx rop
, rtx rshift
)
828 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
829 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
831 /* Get rid of the zero bits we're shifting in. */
834 /* We must now have a solid block of 1's at bit 0. */
835 return exact_log2 (op
+ 1);
838 /* Return the TLS model to use for ADDR. */
840 static enum tls_model
841 tls_symbolic_operand_type (rtx addr
)
843 enum tls_model tls_kind
= TLS_MODEL_NONE
;
845 if (GET_CODE (addr
) == CONST
)
847 if (GET_CODE (XEXP (addr
, 0)) == PLUS
848 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
849 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
851 else if (GET_CODE (addr
) == SYMBOL_REF
)
852 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
857 /* Return true if X is a constant that is valid for some immediate
858 field in an instruction. */
861 ia64_legitimate_constant_p (rtx x
)
863 switch (GET_CODE (x
))
870 if (GET_MODE (x
) == VOIDmode
|| GET_MODE (x
) == SFmode
871 || GET_MODE (x
) == DFmode
)
873 return satisfies_constraint_G (x
);
877 /* ??? Short term workaround for PR 28490. We must make the code here
878 match the code in ia64_expand_move and move_operand, even though they
879 are both technically wrong. */
880 if (tls_symbolic_operand_type (x
) == 0)
882 HOST_WIDE_INT addend
= 0;
885 if (GET_CODE (op
) == CONST
886 && GET_CODE (XEXP (op
, 0)) == PLUS
887 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
889 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
890 op
= XEXP (XEXP (op
, 0), 0);
893 if (any_offset_symbol_operand (op
, GET_MODE (op
))
894 || function_operand (op
, GET_MODE (op
)))
896 if (aligned_offset_symbol_operand (op
, GET_MODE (op
)))
897 return (addend
& 0x3fff) == 0;
904 enum machine_mode mode
= GET_MODE (x
);
906 if (mode
== V2SFmode
)
907 return satisfies_constraint_Y (x
);
909 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
910 && GET_MODE_SIZE (mode
) <= 8);
918 /* Don't allow TLS addresses to get spilled to memory. */
921 ia64_cannot_force_const_mem (rtx x
)
923 if (GET_MODE (x
) == RFmode
)
925 return tls_symbolic_operand_type (x
) != 0;
928 /* Expand a symbolic constant load. */
931 ia64_expand_load_address (rtx dest
, rtx src
)
933 gcc_assert (GET_CODE (dest
) == REG
);
935 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
936 having to pointer-extend the value afterward. Other forms of address
937 computation below are also more natural to compute as 64-bit quantities.
938 If we've been given an SImode destination register, change it. */
939 if (GET_MODE (dest
) != Pmode
)
940 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
941 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
945 if (small_addr_symbolic_operand (src
, VOIDmode
))
949 emit_insn (gen_load_gprel64 (dest
, src
));
950 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
951 emit_insn (gen_load_fptr (dest
, src
));
952 else if (sdata_symbolic_operand (src
, VOIDmode
))
953 emit_insn (gen_load_gprel (dest
, src
));
956 HOST_WIDE_INT addend
= 0;
959 /* We did split constant offsets in ia64_expand_move, and we did try
960 to keep them split in move_operand, but we also allowed reload to
961 rematerialize arbitrary constants rather than spill the value to
962 the stack and reload it. So we have to be prepared here to split
964 if (GET_CODE (src
) == CONST
)
966 HOST_WIDE_INT hi
, lo
;
968 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
969 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
975 src
= plus_constant (XEXP (XEXP (src
, 0), 0), hi
);
979 tmp
= gen_rtx_HIGH (Pmode
, src
);
980 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
981 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
983 tmp
= gen_rtx_LO_SUM (Pmode
, dest
, src
);
984 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
988 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
989 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
996 static GTY(()) rtx gen_tls_tga
;
998 gen_tls_get_addr (void)
1001 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1005 static GTY(()) rtx thread_pointer_rtx
;
1007 gen_thread_pointer (void)
1009 if (!thread_pointer_rtx
)
1010 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1011 return thread_pointer_rtx
;
1015 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1016 rtx orig_op1
, HOST_WIDE_INT addend
)
1018 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
1020 HOST_WIDE_INT addend_lo
, addend_hi
;
1024 case TLS_MODEL_GLOBAL_DYNAMIC
:
1027 tga_op1
= gen_reg_rtx (Pmode
);
1028 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1030 tga_op2
= gen_reg_rtx (Pmode
);
1031 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1033 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1034 LCT_CONST
, Pmode
, 2, tga_op1
,
1035 Pmode
, tga_op2
, Pmode
);
1037 insns
= get_insns ();
1040 if (GET_MODE (op0
) != Pmode
)
1042 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1045 case TLS_MODEL_LOCAL_DYNAMIC
:
1046 /* ??? This isn't the completely proper way to do local-dynamic
1047 If the call to __tls_get_addr is used only by a single symbol,
1048 then we should (somehow) move the dtprel to the second arg
1049 to avoid the extra add. */
1052 tga_op1
= gen_reg_rtx (Pmode
);
1053 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1055 tga_op2
= const0_rtx
;
1057 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1058 LCT_CONST
, Pmode
, 2, tga_op1
,
1059 Pmode
, tga_op2
, Pmode
);
1061 insns
= get_insns ();
1064 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1066 tmp
= gen_reg_rtx (Pmode
);
1067 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1069 if (!register_operand (op0
, Pmode
))
1070 op0
= gen_reg_rtx (Pmode
);
1073 emit_insn (gen_load_dtprel (op0
, op1
));
1074 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1077 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1080 case TLS_MODEL_INITIAL_EXEC
:
1081 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1082 addend_hi
= addend
- addend_lo
;
1084 op1
= plus_constant (op1
, addend_hi
);
1087 tmp
= gen_reg_rtx (Pmode
);
1088 emit_insn (gen_load_tprel (tmp
, op1
));
1090 if (!register_operand (op0
, Pmode
))
1091 op0
= gen_reg_rtx (Pmode
);
1092 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1095 case TLS_MODEL_LOCAL_EXEC
:
1096 if (!register_operand (op0
, Pmode
))
1097 op0
= gen_reg_rtx (Pmode
);
1103 emit_insn (gen_load_tprel (op0
, op1
));
1104 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1107 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1115 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1116 orig_op0
, 1, OPTAB_DIRECT
);
1117 if (orig_op0
== op0
)
1119 if (GET_MODE (orig_op0
) == Pmode
)
1121 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1125 ia64_expand_move (rtx op0
, rtx op1
)
1127 enum machine_mode mode
= GET_MODE (op0
);
1129 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1130 op1
= force_reg (mode
, op1
);
1132 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1134 HOST_WIDE_INT addend
= 0;
1135 enum tls_model tls_kind
;
1138 if (GET_CODE (op1
) == CONST
1139 && GET_CODE (XEXP (op1
, 0)) == PLUS
1140 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1142 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1143 sym
= XEXP (XEXP (op1
, 0), 0);
1146 tls_kind
= tls_symbolic_operand_type (sym
);
1148 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1150 if (any_offset_symbol_operand (sym
, mode
))
1152 else if (aligned_offset_symbol_operand (sym
, mode
))
1154 HOST_WIDE_INT addend_lo
, addend_hi
;
1156 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1157 addend_hi
= addend
- addend_lo
;
1161 op1
= plus_constant (sym
, addend_hi
);
1170 if (reload_completed
)
1172 /* We really should have taken care of this offset earlier. */
1173 gcc_assert (addend
== 0);
1174 if (ia64_expand_load_address (op0
, op1
))
1180 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1182 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, op1
));
1184 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1185 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1194 /* Split a move from OP1 to OP0 conditional on COND. */
1197 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1199 rtx insn
, first
= get_last_insn ();
1201 emit_move_insn (op0
, op1
);
1203 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1205 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1209 /* Split a post-reload TImode or TFmode reference into two DImode
1210 components. This is made extra difficult by the fact that we do
1211 not get any scratch registers to work with, because reload cannot
1212 be prevented from giving us a scratch that overlaps the register
1213 pair involved. So instead, when addressing memory, we tweak the
1214 pointer register up and back down with POST_INCs. Or up and not
1215 back down when we can get away with it.
1217 REVERSED is true when the loads must be done in reversed order
1218 (high word first) for correctness. DEAD is true when the pointer
1219 dies with the second insn we generate and therefore the second
1220 address must not carry a postmodify.
1222 May return an insn which is to be emitted after the moves. */
1225 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1229 switch (GET_CODE (in
))
1232 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1233 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1238 /* Cannot occur reversed. */
1239 gcc_assert (!reversed
);
1241 if (GET_MODE (in
) != TFmode
)
1242 split_double (in
, &out
[0], &out
[1]);
1244 /* split_double does not understand how to split a TFmode
1245 quantity into a pair of DImode constants. */
1248 unsigned HOST_WIDE_INT p
[2];
1249 long l
[4]; /* TFmode is 128 bits */
1251 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
1252 real_to_target (l
, &r
, TFmode
);
1254 if (FLOAT_WORDS_BIG_ENDIAN
)
1256 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1257 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1261 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1262 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1264 out
[0] = GEN_INT (p
[0]);
1265 out
[1] = GEN_INT (p
[1]);
1271 rtx base
= XEXP (in
, 0);
1274 switch (GET_CODE (base
))
1279 out
[0] = adjust_automodify_address
1280 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1281 out
[1] = adjust_automodify_address
1282 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1286 /* Reversal requires a pre-increment, which can only
1287 be done as a separate insn. */
1288 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1289 out
[0] = adjust_automodify_address
1290 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1291 out
[1] = adjust_address (in
, DImode
, 0);
1296 gcc_assert (!reversed
&& !dead
);
1298 /* Just do the increment in two steps. */
1299 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1300 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1304 gcc_assert (!reversed
&& !dead
);
1306 /* Add 8, subtract 24. */
1307 base
= XEXP (base
, 0);
1308 out
[0] = adjust_automodify_address
1309 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1310 out
[1] = adjust_automodify_address
1312 gen_rtx_POST_MODIFY (Pmode
, base
, plus_constant (base
, -24)),
1317 gcc_assert (!reversed
&& !dead
);
1319 /* Extract and adjust the modification. This case is
1320 trickier than the others, because we might have an
1321 index register, or we might have a combined offset that
1322 doesn't fit a signed 9-bit displacement field. We can
1323 assume the incoming expression is already legitimate. */
1324 offset
= XEXP (base
, 1);
1325 base
= XEXP (base
, 0);
1327 out
[0] = adjust_automodify_address
1328 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1330 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1332 /* Can't adjust the postmodify to match. Emit the
1333 original, then a separate addition insn. */
1334 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1335 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1339 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1340 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1342 /* Again the postmodify cannot be made to match,
1343 but in this case it's more efficient to get rid
1344 of the postmodify entirely and fix up with an
1346 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1348 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1352 /* Combined offset still fits in the displacement field.
1353 (We cannot overflow it at the high end.) */
1354 out
[1] = adjust_automodify_address
1355 (in
, DImode
, gen_rtx_POST_MODIFY
1356 (Pmode
, base
, gen_rtx_PLUS
1358 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1377 /* Split a TImode or TFmode move instruction after reload.
1378 This is used by *movtf_internal and *movti_internal. */
1380 ia64_split_tmode_move (rtx operands
[])
1382 rtx in
[2], out
[2], insn
;
1385 bool reversed
= false;
1387 /* It is possible for reload to decide to overwrite a pointer with
1388 the value it points to. In that case we have to do the loads in
1389 the appropriate order so that the pointer is not destroyed too
1390 early. Also we must not generate a postmodify for that second
1391 load, or rws_access_regno will die. */
1392 if (GET_CODE (operands
[1]) == MEM
1393 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1395 rtx base
= XEXP (operands
[1], 0);
1396 while (GET_CODE (base
) != REG
)
1397 base
= XEXP (base
, 0);
1399 if (REGNO (base
) == REGNO (operands
[0]))
1403 /* Another reason to do the moves in reversed order is if the first
1404 element of the target register pair is also the second element of
1405 the source register pair. */
1406 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1407 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1410 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1411 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1413 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1414 if (GET_CODE (EXP) == MEM \
1415 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1416 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1417 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1418 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1420 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[0], in
[0]));
1421 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1422 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1424 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[1], in
[1]));
1425 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1426 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1429 emit_insn (fixup
[0]);
1431 emit_insn (fixup
[1]);
1433 #undef MAYBE_ADD_REG_INC_NOTE
1436 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1437 through memory plus an extra GR scratch register. Except that you can
1438 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1439 SECONDARY_RELOAD_CLASS, but not both.
1441 We got into problems in the first place by allowing a construct like
1442 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1443 This solution attempts to prevent this situation from occurring. When
1444 we see something like the above, we spill the inner register to memory. */
1447 spill_xfmode_rfmode_operand (rtx in
, int force
, enum machine_mode mode
)
1449 if (GET_CODE (in
) == SUBREG
1450 && GET_MODE (SUBREG_REG (in
)) == TImode
1451 && GET_CODE (SUBREG_REG (in
)) == REG
)
1453 rtx memt
= assign_stack_temp (TImode
, 16, 0);
1454 emit_move_insn (memt
, SUBREG_REG (in
));
1455 return adjust_address (memt
, mode
, 0);
1457 else if (force
&& GET_CODE (in
) == REG
)
1459 rtx memx
= assign_stack_temp (mode
, 16, 0);
1460 emit_move_insn (memx
, in
);
1467 /* Expand the movxf or movrf pattern (MODE says which) with the given
1468 OPERANDS, returning true if the pattern should then invoke
1472 ia64_expand_movxf_movrf (enum machine_mode mode
, rtx operands
[])
1474 rtx op0
= operands
[0];
1476 if (GET_CODE (op0
) == SUBREG
)
1477 op0
= SUBREG_REG (op0
);
1479 /* We must support XFmode loads into general registers for stdarg/vararg,
1480 unprototyped calls, and a rare case where a long double is passed as
1481 an argument after a float HFA fills the FP registers. We split them into
1482 DImode loads for convenience. We also need to support XFmode stores
1483 for the last case. This case does not happen for stdarg/vararg routines,
1484 because we do a block store to memory of unnamed arguments. */
1486 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1490 /* We're hoping to transform everything that deals with XFmode
1491 quantities and GR registers early in the compiler. */
1492 gcc_assert (can_create_pseudo_p ());
1494 /* Struct to register can just use TImode instead. */
1495 if ((GET_CODE (operands
[1]) == SUBREG
1496 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1497 || (GET_CODE (operands
[1]) == REG
1498 && GR_REGNO_P (REGNO (operands
[1]))))
1500 rtx op1
= operands
[1];
1502 if (GET_CODE (op1
) == SUBREG
)
1503 op1
= SUBREG_REG (op1
);
1505 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1507 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1511 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1513 /* Don't word-swap when reading in the constant. */
1514 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1515 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1517 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1518 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1523 /* If the quantity is in a register not known to be GR, spill it. */
1524 if (register_operand (operands
[1], mode
))
1525 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1527 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1529 /* Don't word-swap when reading in the value. */
1530 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1531 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1533 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1534 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1538 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1540 /* We're hoping to transform everything that deals with XFmode
1541 quantities and GR registers early in the compiler. */
1542 gcc_assert (can_create_pseudo_p ());
1544 /* Op0 can't be a GR_REG here, as that case is handled above.
1545 If op0 is a register, then we spill op1, so that we now have a
1546 MEM operand. This requires creating an XFmode subreg of a TImode reg
1547 to force the spill. */
1548 if (register_operand (operands
[0], mode
))
1550 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1551 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1552 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1559 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1561 /* Don't word-swap when writing out the value. */
1562 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1563 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1565 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1566 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1571 if (!reload_in_progress
&& !reload_completed
)
1573 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1575 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1577 rtx memt
, memx
, in
= operands
[1];
1578 if (CONSTANT_P (in
))
1579 in
= validize_mem (force_const_mem (mode
, in
));
1580 if (GET_CODE (in
) == MEM
)
1581 memt
= adjust_address (in
, TImode
, 0);
1584 memt
= assign_stack_temp (TImode
, 16, 0);
1585 memx
= adjust_address (memt
, mode
, 0);
1586 emit_move_insn (memx
, in
);
1588 emit_move_insn (op0
, memt
);
1592 if (!ia64_move_ok (operands
[0], operands
[1]))
1593 operands
[1] = force_reg (mode
, operands
[1]);
1599 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1600 with the expression that holds the compare result (in VOIDmode). */
1602 static GTY(()) rtx cmptf_libfunc
;
1605 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1607 enum rtx_code code
= GET_CODE (*expr
);
1610 /* If we have a BImode input, then we already have a compare result, and
1611 do not need to emit another comparison. */
1612 if (GET_MODE (*op0
) == BImode
)
1614 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1617 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1618 magic number as its third argument, that indicates what to do.
1619 The return value is an integer to be compared against zero. */
1620 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1623 QCMP_INV
= 1, /* Raise FP_INVALID on SNaN as a side effect. */
1630 enum rtx_code ncode
;
1633 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1636 /* 1 = equal, 0 = not equal. Equality operators do
1637 not raise FP_INVALID when given an SNaN operand. */
1638 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1639 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1640 /* isunordered() from C99. */
1641 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1642 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1643 /* Relational operators raise FP_INVALID when given
1645 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1646 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1647 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1648 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1649 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1650 Expanders for buneq etc. weuld have to be added to ia64.md
1651 for this to be useful. */
1652 default: gcc_unreachable ();
1657 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1658 *op0
, TFmode
, *op1
, TFmode
,
1659 GEN_INT (magic
), DImode
);
1660 cmp
= gen_reg_rtx (BImode
);
1661 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1662 gen_rtx_fmt_ee (ncode
, BImode
,
1665 insns
= get_insns ();
1668 emit_libcall_block (insns
, cmp
, cmp
,
1669 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1674 cmp
= gen_reg_rtx (BImode
);
1675 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1676 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1680 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1685 /* Generate an integral vector comparison. Return true if the condition has
1686 been reversed, and so the sense of the comparison should be inverted. */
1689 ia64_expand_vecint_compare (enum rtx_code code
, enum machine_mode mode
,
1690 rtx dest
, rtx op0
, rtx op1
)
1692 bool negate
= false;
1695 /* Canonicalize the comparison to EQ, GT, GTU. */
1706 code
= reverse_condition (code
);
1712 code
= reverse_condition (code
);
1718 code
= swap_condition (code
);
1719 x
= op0
, op0
= op1
, op1
= x
;
1726 /* Unsigned parallel compare is not supported by the hardware. Play some
1727 tricks to turn this into a signed comparison against 0. */
1736 /* Subtract (-(INT MAX) - 1) from both operands to make
1738 mask
= GEN_INT (0x80000000);
1739 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1740 mask
= force_reg (mode
, mask
);
1741 t1
= gen_reg_rtx (mode
);
1742 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1743 t2
= gen_reg_rtx (mode
);
1744 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1753 /* Perform a parallel unsigned saturating subtraction. */
1754 x
= gen_reg_rtx (mode
);
1755 emit_insn (gen_rtx_SET (VOIDmode
, x
,
1756 gen_rtx_US_MINUS (mode
, op0
, op1
)));
1760 op1
= CONST0_RTX (mode
);
1769 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1770 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
1775 /* Emit an integral vector conditional move. */
1778 ia64_expand_vecint_cmov (rtx operands
[])
1780 enum machine_mode mode
= GET_MODE (operands
[0]);
1781 enum rtx_code code
= GET_CODE (operands
[3]);
1785 cmp
= gen_reg_rtx (mode
);
1786 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1787 operands
[4], operands
[5]);
1789 ot
= operands
[1+negate
];
1790 of
= operands
[2-negate
];
1792 if (ot
== CONST0_RTX (mode
))
1794 if (of
== CONST0_RTX (mode
))
1796 emit_move_insn (operands
[0], ot
);
1800 x
= gen_rtx_NOT (mode
, cmp
);
1801 x
= gen_rtx_AND (mode
, x
, of
);
1802 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1804 else if (of
== CONST0_RTX (mode
))
1806 x
= gen_rtx_AND (mode
, cmp
, ot
);
1807 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1813 t
= gen_reg_rtx (mode
);
1814 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
1815 emit_insn (gen_rtx_SET (VOIDmode
, t
, x
));
1817 f
= gen_reg_rtx (mode
);
1818 x
= gen_rtx_NOT (mode
, cmp
);
1819 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
1820 emit_insn (gen_rtx_SET (VOIDmode
, f
, x
));
1822 x
= gen_rtx_IOR (mode
, t
, f
);
1823 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], x
));
1827 /* Emit an integral vector min or max operation. Return true if all done. */
1830 ia64_expand_vecint_minmax (enum rtx_code code
, enum machine_mode mode
,
1835 /* These four combinations are supported directly. */
1836 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
1838 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
1841 /* This combination can be implemented with only saturating subtraction. */
1842 if (mode
== V4HImode
&& code
== UMAX
)
1844 rtx x
, tmp
= gen_reg_rtx (mode
);
1846 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
1847 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, x
));
1849 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
1853 /* Everything else implemented via vector comparisons. */
1854 xops
[0] = operands
[0];
1855 xops
[4] = xops
[1] = operands
[1];
1856 xops
[5] = xops
[2] = operands
[2];
1875 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
1877 ia64_expand_vecint_cmov (xops
);
1881 /* Emit an integral vector widening sum operations. */
1884 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
1887 enum machine_mode wmode
, mode
;
1888 rtx (*unpack_l
) (rtx
, rtx
, rtx
);
1889 rtx (*unpack_h
) (rtx
, rtx
, rtx
);
1890 rtx (*plus
) (rtx
, rtx
, rtx
);
1892 wmode
= GET_MODE (operands
[0]);
1893 mode
= GET_MODE (operands
[1]);
1898 unpack_l
= gen_unpack1_l
;
1899 unpack_h
= gen_unpack1_h
;
1900 plus
= gen_addv4hi3
;
1903 unpack_l
= gen_unpack2_l
;
1904 unpack_h
= gen_unpack2_h
;
1905 plus
= gen_addv2si3
;
1911 /* Fill in x with the sign extension of each element in op1. */
1913 x
= CONST0_RTX (mode
);
1918 x
= gen_reg_rtx (mode
);
1920 neg
= ia64_expand_vecint_compare (LT
, mode
, x
, operands
[1],
1925 l
= gen_reg_rtx (wmode
);
1926 h
= gen_reg_rtx (wmode
);
1927 s
= gen_reg_rtx (wmode
);
1929 emit_insn (unpack_l (gen_lowpart (mode
, l
), operands
[1], x
));
1930 emit_insn (unpack_h (gen_lowpart (mode
, h
), operands
[1], x
));
1931 emit_insn (plus (s
, l
, operands
[2]));
1932 emit_insn (plus (operands
[0], h
, s
));
1935 /* Emit a signed or unsigned V8QI dot product operation. */
1938 ia64_expand_dot_prod_v8qi (rtx operands
[4], bool unsignedp
)
1940 rtx l1
, l2
, h1
, h2
, x1
, x2
, p1
, p2
, p3
, p4
, s1
, s2
, s3
;
1942 /* Fill in x1 and x2 with the sign extension of each element. */
1944 x1
= x2
= CONST0_RTX (V8QImode
);
1949 x1
= gen_reg_rtx (V8QImode
);
1950 x2
= gen_reg_rtx (V8QImode
);
1952 neg
= ia64_expand_vecint_compare (LT
, V8QImode
, x1
, operands
[1],
1953 CONST0_RTX (V8QImode
));
1955 neg
= ia64_expand_vecint_compare (LT
, V8QImode
, x2
, operands
[2],
1956 CONST0_RTX (V8QImode
));
1960 l1
= gen_reg_rtx (V4HImode
);
1961 l2
= gen_reg_rtx (V4HImode
);
1962 h1
= gen_reg_rtx (V4HImode
);
1963 h2
= gen_reg_rtx (V4HImode
);
1965 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode
, l1
), operands
[1], x1
));
1966 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode
, l2
), operands
[2], x2
));
1967 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode
, h1
), operands
[1], x1
));
1968 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode
, h2
), operands
[2], x2
));
1970 p1
= gen_reg_rtx (V2SImode
);
1971 p2
= gen_reg_rtx (V2SImode
);
1972 p3
= gen_reg_rtx (V2SImode
);
1973 p4
= gen_reg_rtx (V2SImode
);
1974 emit_insn (gen_pmpy2_r (p1
, l1
, l2
));
1975 emit_insn (gen_pmpy2_l (p2
, l1
, l2
));
1976 emit_insn (gen_pmpy2_r (p3
, h1
, h2
));
1977 emit_insn (gen_pmpy2_l (p4
, h1
, h2
));
1979 s1
= gen_reg_rtx (V2SImode
);
1980 s2
= gen_reg_rtx (V2SImode
);
1981 s3
= gen_reg_rtx (V2SImode
);
1982 emit_insn (gen_addv2si3 (s1
, p1
, p2
));
1983 emit_insn (gen_addv2si3 (s2
, p3
, p4
));
1984 emit_insn (gen_addv2si3 (s3
, s1
, operands
[3]));
1985 emit_insn (gen_addv2si3 (operands
[0], s2
, s3
));
1988 /* Emit the appropriate sequence for a call. */
1991 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
1996 addr
= XEXP (addr
, 0);
1997 addr
= convert_memory_address (DImode
, addr
);
1998 b0
= gen_rtx_REG (DImode
, R_BR (0));
2000 /* ??? Should do this for functions known to bind local too. */
2001 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2004 insn
= gen_sibcall_nogp (addr
);
2006 insn
= gen_call_nogp (addr
, b0
);
2008 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2009 insn
= emit_call_insn (insn
);
2014 insn
= gen_sibcall_gp (addr
);
2016 insn
= gen_call_gp (addr
, b0
);
2018 insn
= gen_call_value_gp (retval
, addr
, b0
);
2019 insn
= emit_call_insn (insn
);
2021 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2025 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2027 if (TARGET_ABI_OPEN_VMS
)
2028 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2029 gen_rtx_REG (DImode
, GR_REG (25)));
2033 reg_emitted (enum ia64_frame_regs r
)
2035 if (emitted_frame_related_regs
[r
] == 0)
2036 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2038 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2042 get_reg (enum ia64_frame_regs r
)
2045 return current_frame_info
.r
[r
];
2049 is_emitted (int regno
)
2053 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2054 if (emitted_frame_related_regs
[r
] == regno
)
2060 ia64_reload_gp (void)
2064 if (current_frame_info
.r
[reg_save_gp
])
2066 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2070 HOST_WIDE_INT offset
;
2073 offset
= (current_frame_info
.spill_cfa_off
2074 + current_frame_info
.spill_size
);
2075 if (frame_pointer_needed
)
2077 tmp
= hard_frame_pointer_rtx
;
2082 tmp
= stack_pointer_rtx
;
2083 offset
= current_frame_info
.total_size
- offset
;
2086 offset_r
= GEN_INT (offset
);
2087 if (satisfies_constraint_I (offset_r
))
2088 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2091 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2092 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2093 pic_offset_table_rtx
, tmp
));
2096 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2099 emit_move_insn (pic_offset_table_rtx
, tmp
);
2103 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2104 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2107 bool is_desc
= false;
2109 /* If we find we're calling through a register, then we're actually
2110 calling through a descriptor, so load up the values. */
2111 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2116 /* ??? We are currently constrained to *not* use peep2, because
2117 we can legitimately change the global lifetime of the GP
2118 (in the form of killing where previously live). This is
2119 because a call through a descriptor doesn't use the previous
2120 value of the GP, while a direct call does, and we do not
2121 commit to either form until the split here.
2123 That said, this means that we lack precise life info for
2124 whether ADDR is dead after this call. This is not terribly
2125 important, since we can fix things up essentially for free
2126 with the POST_DEC below, but it's nice to not use it when we
2127 can immediately tell it's not necessary. */
2128 addr_dead_p
= ((noreturn_p
|| sibcall_p
2129 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2131 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2133 /* Load the code address into scratch_b. */
2134 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2135 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2136 emit_move_insn (scratch_r
, tmp
);
2137 emit_move_insn (scratch_b
, scratch_r
);
2139 /* Load the GP address. If ADDR is not dead here, then we must
2140 revert the change made above via the POST_INCREMENT. */
2142 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2145 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2146 emit_move_insn (pic_offset_table_rtx
, tmp
);
2153 insn
= gen_sibcall_nogp (addr
);
2155 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2157 insn
= gen_call_nogp (addr
, retaddr
);
2158 emit_call_insn (insn
);
2160 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2164 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2166 This differs from the generic code in that we know about the zero-extending
2167 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2168 also know that ld.acq+cmpxchg.rel equals a full barrier.
2170 The loop we want to generate looks like
2175 new_reg = cmp_reg op val;
2176 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2177 if (cmp_reg != old_reg)
2180 Note that we only do the plain load from memory once. Subsequent
2181 iterations use the value loaded by the compare-and-swap pattern. */
2184 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2185 rtx old_dst
, rtx new_dst
)
2187 enum machine_mode mode
= GET_MODE (mem
);
2188 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2189 enum insn_code icode
;
2191 /* Special case for using fetchadd. */
2192 if ((mode
== SImode
|| mode
== DImode
)
2193 && (code
== PLUS
|| code
== MINUS
)
2194 && fetchadd_operand (val
, mode
))
2197 val
= GEN_INT (-INTVAL (val
));
2200 old_dst
= gen_reg_rtx (mode
);
2202 emit_insn (gen_memory_barrier ());
2205 icode
= CODE_FOR_fetchadd_acq_si
;
2207 icode
= CODE_FOR_fetchadd_acq_di
;
2208 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2212 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2214 if (new_reg
!= new_dst
)
2215 emit_move_insn (new_dst
, new_reg
);
2220 /* Because of the volatile mem read, we get an ld.acq, which is the
2221 front half of the full barrier. The end half is the cmpxchg.rel. */
2222 gcc_assert (MEM_VOLATILE_P (mem
));
2224 old_reg
= gen_reg_rtx (DImode
);
2225 cmp_reg
= gen_reg_rtx (DImode
);
2226 label
= gen_label_rtx ();
2230 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2231 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2234 emit_move_insn (cmp_reg
, mem
);
2238 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2239 emit_move_insn (old_reg
, cmp_reg
);
2240 emit_move_insn (ar_ccv
, cmp_reg
);
2243 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2248 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2249 true, OPTAB_DIRECT
);
2250 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2253 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2254 true, OPTAB_DIRECT
);
2257 new_reg
= gen_lowpart (mode
, new_reg
);
2259 emit_move_insn (new_dst
, new_reg
);
2263 case QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2264 case HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2265 case SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2266 case DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2271 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2273 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2276 /* Begin the assembly file. */
2279 ia64_file_start (void)
2281 /* Variable tracking should be run after all optimizations which change order
2282 of insns. It also needs a valid CFG. This can't be done in
2283 ia64_override_options, because flag_var_tracking is finalized after
2285 ia64_flag_var_tracking
= flag_var_tracking
;
2286 flag_var_tracking
= 0;
2288 default_file_start ();
2289 emit_safe_across_calls ();
2293 emit_safe_across_calls (void)
2295 unsigned int rs
, re
;
2302 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2306 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2310 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2314 fputc (',', asm_out_file
);
2316 fprintf (asm_out_file
, "p%u", rs
);
2318 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2322 fputc ('\n', asm_out_file
);
2325 /* Globalize a declaration. */
2328 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2330 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2331 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2334 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2335 const char *p
= TREE_STRING_POINTER (v
);
2336 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2338 targetm
.asm_out
.globalize_label (stream
, name
);
2339 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2340 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2343 /* Helper function for ia64_compute_frame_size: find an appropriate general
2344 register to spill some special register to. SPECIAL_SPILL_MASK contains
2345 bits in GR0 to GR31 that have already been allocated by this routine.
2346 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2349 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2353 if (emitted_frame_related_regs
[r
] != 0)
2355 regno
= emitted_frame_related_regs
[r
];
2356 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2357 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2358 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2359 else if (current_function_is_leaf
2360 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2361 current_frame_info
.gr_used_mask
|= 1 << regno
;
2366 /* If this is a leaf function, first try an otherwise unused
2367 call-clobbered register. */
2368 if (current_function_is_leaf
)
2370 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2371 if (! df_regs_ever_live_p (regno
)
2372 && call_used_regs
[regno
]
2373 && ! fixed_regs
[regno
]
2374 && ! global_regs
[regno
]
2375 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2376 && ! is_emitted (regno
))
2378 current_frame_info
.gr_used_mask
|= 1 << regno
;
2385 regno
= current_frame_info
.n_local_regs
;
2386 /* If there is a frame pointer, then we can't use loc79, because
2387 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2388 reg_name switching code in ia64_expand_prologue. */
2389 while (regno
< (80 - frame_pointer_needed
))
2390 if (! is_emitted (LOC_REG (regno
++)))
2392 current_frame_info
.n_local_regs
= regno
;
2393 return LOC_REG (regno
- 1);
2397 /* Failed to find a general register to spill to. Must use stack. */
2401 /* In order to make for nice schedules, we try to allocate every temporary
2402 to a different register. We must of course stay away from call-saved,
2403 fixed, and global registers. We must also stay away from registers
2404 allocated in current_frame_info.gr_used_mask, since those include regs
2405 used all through the prologue.
2407 Any register allocated here must be used immediately. The idea is to
2408 aid scheduling, not to solve data flow problems. */
2410 static int last_scratch_gr_reg
;
2413 next_scratch_gr_reg (void)
2417 for (i
= 0; i
< 32; ++i
)
2419 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2420 if (call_used_regs
[regno
]
2421 && ! fixed_regs
[regno
]
2422 && ! global_regs
[regno
]
2423 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2425 last_scratch_gr_reg
= regno
;
2430 /* There must be _something_ available. */
2434 /* Helper function for ia64_compute_frame_size, called through
2435 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2438 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2440 unsigned int regno
= REGNO (reg
);
2443 unsigned int i
, n
= hard_regno_nregs
[regno
][GET_MODE (reg
)];
2444 for (i
= 0; i
< n
; ++i
)
2445 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2450 /* Returns the number of bytes offset between the frame pointer and the stack
2451 pointer for the current function. SIZE is the number of bytes of space
2452 needed for local variables. */
2455 ia64_compute_frame_size (HOST_WIDE_INT size
)
2457 HOST_WIDE_INT total_size
;
2458 HOST_WIDE_INT spill_size
= 0;
2459 HOST_WIDE_INT extra_spill_size
= 0;
2460 HOST_WIDE_INT pretend_args_size
;
2463 int spilled_gr_p
= 0;
2464 int spilled_fr_p
= 0;
2470 if (current_frame_info
.initialized
)
2473 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2474 CLEAR_HARD_REG_SET (mask
);
2476 /* Don't allocate scratches to the return register. */
2477 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2479 /* Don't allocate scratches to the EH scratch registers. */
2480 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2481 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2482 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2483 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2485 /* Find the size of the register stack frame. We have only 80 local
2486 registers, because we reserve 8 for the inputs and 8 for the
2489 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2490 since we'll be adjusting that down later. */
2491 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2492 for (; regno
>= LOC_REG (0); regno
--)
2493 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2495 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2497 /* For functions marked with the syscall_linkage attribute, we must mark
2498 all eight input registers as in use, so that locals aren't visible to
2501 if (cfun
->machine
->n_varargs
> 0
2502 || lookup_attribute ("syscall_linkage",
2503 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2504 current_frame_info
.n_input_regs
= 8;
2507 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2508 if (df_regs_ever_live_p (regno
))
2510 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2513 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2514 if (df_regs_ever_live_p (regno
))
2516 i
= regno
- OUT_REG (0) + 1;
2518 #ifndef PROFILE_HOOK
2519 /* When -p profiling, we need one output register for the mcount argument.
2520 Likewise for -a profiling for the bb_init_func argument. For -ax
2521 profiling, we need two output registers for the two bb_init_trace_func
2526 current_frame_info
.n_output_regs
= i
;
2528 /* ??? No rotating register support yet. */
2529 current_frame_info
.n_rotate_regs
= 0;
2531 /* Discover which registers need spilling, and how much room that
2532 will take. Begin with floating point and general registers,
2533 which will always wind up on the stack. */
2535 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2536 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2538 SET_HARD_REG_BIT (mask
, regno
);
2544 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2545 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2547 SET_HARD_REG_BIT (mask
, regno
);
2553 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2554 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2556 SET_HARD_REG_BIT (mask
, regno
);
2561 /* Now come all special registers that might get saved in other
2562 general registers. */
2564 if (frame_pointer_needed
)
2566 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2567 /* If we did not get a register, then we take LOC79. This is guaranteed
2568 to be free, even if regs_ever_live is already set, because this is
2569 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2570 as we don't count loc79 above. */
2571 if (current_frame_info
.r
[reg_fp
] == 0)
2573 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2574 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2578 if (! current_function_is_leaf
)
2580 /* Emit a save of BR0 if we call other functions. Do this even
2581 if this function doesn't return, as EH depends on this to be
2582 able to unwind the stack. */
2583 SET_HARD_REG_BIT (mask
, BR_REG (0));
2585 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2586 if (current_frame_info
.r
[reg_save_b0
] == 0)
2588 extra_spill_size
+= 8;
2592 /* Similarly for ar.pfs. */
2593 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2594 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2595 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2597 extra_spill_size
+= 8;
2601 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2602 registers are clobbered, so we fall back to the stack. */
2603 current_frame_info
.r
[reg_save_gp
]
2604 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2605 if (current_frame_info
.r
[reg_save_gp
] == 0)
2607 SET_HARD_REG_BIT (mask
, GR_REG (1));
2614 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2616 SET_HARD_REG_BIT (mask
, BR_REG (0));
2617 extra_spill_size
+= 8;
2621 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2623 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2624 current_frame_info
.r
[reg_save_ar_pfs
]
2625 = find_gr_spill (reg_save_ar_pfs
, 1);
2626 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2628 extra_spill_size
+= 8;
2634 /* Unwind descriptor hackery: things are most efficient if we allocate
2635 consecutive GR save registers for RP, PFS, FP in that order. However,
2636 it is absolutely critical that FP get the only hard register that's
2637 guaranteed to be free, so we allocated it first. If all three did
2638 happen to be allocated hard regs, and are consecutive, rearrange them
2639 into the preferred order now.
2641 If we have already emitted code for any of those registers,
2642 then it's already too late to change. */
2643 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2644 MIN (current_frame_info
.r
[reg_save_b0
],
2645 current_frame_info
.r
[reg_save_ar_pfs
]));
2646 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2647 MAX (current_frame_info
.r
[reg_save_b0
],
2648 current_frame_info
.r
[reg_save_ar_pfs
]));
2650 && min_regno
+ 2 == max_regno
2651 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2652 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2653 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2654 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2655 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2656 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2657 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2658 && (emitted_frame_related_regs
[reg_fp
] == 0
2659 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2661 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2662 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2663 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2666 /* See if we need to store the predicate register block. */
2667 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2668 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2670 if (regno
<= PR_REG (63))
2672 SET_HARD_REG_BIT (mask
, PR_REG (0));
2673 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2674 if (current_frame_info
.r
[reg_save_pr
] == 0)
2676 extra_spill_size
+= 8;
2680 /* ??? Mark them all as used so that register renaming and such
2681 are free to use them. */
2682 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2683 df_set_regs_ever_live (regno
, true);
2686 /* If we're forced to use st8.spill, we're forced to save and restore
2687 ar.unat as well. The check for existing liveness allows inline asm
2688 to touch ar.unat. */
2689 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2690 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2692 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2693 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2694 current_frame_info
.r
[reg_save_ar_unat
]
2695 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2696 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2698 extra_spill_size
+= 8;
2703 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2705 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2706 current_frame_info
.r
[reg_save_ar_lc
]
2707 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2708 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2710 extra_spill_size
+= 8;
2715 /* If we have an odd number of words of pretend arguments written to
2716 the stack, then the FR save area will be unaligned. We round the
2717 size of this area up to keep things 16 byte aligned. */
2719 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2721 pretend_args_size
= crtl
->args
.pretend_args_size
;
2723 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2724 + crtl
->outgoing_args_size
);
2725 total_size
= IA64_STACK_ALIGN (total_size
);
2727 /* We always use the 16-byte scratch area provided by the caller, but
2728 if we are a leaf function, there's no one to which we need to provide
2730 if (current_function_is_leaf
)
2731 total_size
= MAX (0, total_size
- 16);
2733 current_frame_info
.total_size
= total_size
;
2734 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2735 current_frame_info
.spill_size
= spill_size
;
2736 current_frame_info
.extra_spill_size
= extra_spill_size
;
2737 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2738 current_frame_info
.n_spilled
= n_spilled
;
2739 current_frame_info
.initialized
= reload_completed
;
2742 /* Worker function for TARGET_CAN_ELIMINATE. */
2745 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2747 return (to
== BR_REG (0) ? current_function_is_leaf
: true);
2750 /* Compute the initial difference between the specified pair of registers. */
2753 ia64_initial_elimination_offset (int from
, int to
)
2755 HOST_WIDE_INT offset
;
2757 ia64_compute_frame_size (get_frame_size ());
2760 case FRAME_POINTER_REGNUM
:
2763 case HARD_FRAME_POINTER_REGNUM
:
2764 if (current_function_is_leaf
)
2765 offset
= -current_frame_info
.total_size
;
2767 offset
= -(current_frame_info
.total_size
2768 - crtl
->outgoing_args_size
- 16);
2771 case STACK_POINTER_REGNUM
:
2772 if (current_function_is_leaf
)
2775 offset
= 16 + crtl
->outgoing_args_size
;
2783 case ARG_POINTER_REGNUM
:
2784 /* Arguments start above the 16 byte save area, unless stdarg
2785 in which case we store through the 16 byte save area. */
2788 case HARD_FRAME_POINTER_REGNUM
:
2789 offset
= 16 - crtl
->args
.pretend_args_size
;
2792 case STACK_POINTER_REGNUM
:
2793 offset
= (current_frame_info
.total_size
2794 + 16 - crtl
->args
.pretend_args_size
);
2809 /* If there are more than a trivial number of register spills, we use
2810 two interleaved iterators so that we can get two memory references
2813 In order to simplify things in the prologue and epilogue expanders,
2814 we use helper functions to fix up the memory references after the
2815 fact with the appropriate offsets to a POST_MODIFY memory mode.
2816 The following data structure tracks the state of the two iterators
2817 while insns are being emitted. */
2819 struct spill_fill_data
2821 rtx init_after
; /* point at which to emit initializations */
2822 rtx init_reg
[2]; /* initial base register */
2823 rtx iter_reg
[2]; /* the iterator registers */
2824 rtx
*prev_addr
[2]; /* address of last memory use */
2825 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
2826 HOST_WIDE_INT prev_off
[2]; /* last offset */
2827 int n_iter
; /* number of iterators in use */
2828 int next_iter
; /* next iterator to use */
2829 unsigned int save_gr_used_mask
;
2832 static struct spill_fill_data spill_fill_data
;
2835 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
2839 spill_fill_data
.init_after
= get_last_insn ();
2840 spill_fill_data
.init_reg
[0] = init_reg
;
2841 spill_fill_data
.init_reg
[1] = init_reg
;
2842 spill_fill_data
.prev_addr
[0] = NULL
;
2843 spill_fill_data
.prev_addr
[1] = NULL
;
2844 spill_fill_data
.prev_insn
[0] = NULL
;
2845 spill_fill_data
.prev_insn
[1] = NULL
;
2846 spill_fill_data
.prev_off
[0] = cfa_off
;
2847 spill_fill_data
.prev_off
[1] = cfa_off
;
2848 spill_fill_data
.next_iter
= 0;
2849 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
2851 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
2852 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
2854 int regno
= next_scratch_gr_reg ();
2855 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
2856 current_frame_info
.gr_used_mask
|= 1 << regno
;
2861 finish_spill_pointers (void)
2863 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
2867 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
2869 int iter
= spill_fill_data
.next_iter
;
2870 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
2871 rtx disp_rtx
= GEN_INT (disp
);
2874 if (spill_fill_data
.prev_addr
[iter
])
2876 if (satisfies_constraint_N (disp_rtx
))
2878 *spill_fill_data
.prev_addr
[iter
]
2879 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
2880 gen_rtx_PLUS (DImode
,
2881 spill_fill_data
.iter_reg
[iter
],
2883 add_reg_note (spill_fill_data
.prev_insn
[iter
],
2884 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
2888 /* ??? Could use register post_modify for loads. */
2889 if (!satisfies_constraint_I (disp_rtx
))
2891 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2892 emit_move_insn (tmp
, disp_rtx
);
2895 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2896 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
2899 /* Micro-optimization: if we've created a frame pointer, it's at
2900 CFA 0, which may allow the real iterator to be initialized lower,
2901 slightly increasing parallelism. Also, if there are few saves
2902 it may eliminate the iterator entirely. */
2904 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
2905 && frame_pointer_needed
)
2907 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
2908 set_mem_alias_set (mem
, get_varargs_alias_set ());
2916 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
2917 spill_fill_data
.init_reg
[iter
]);
2922 if (!satisfies_constraint_I (disp_rtx
))
2924 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2925 emit_move_insn (tmp
, disp_rtx
);
2929 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2930 spill_fill_data
.init_reg
[iter
],
2937 /* Careful for being the first insn in a sequence. */
2938 if (spill_fill_data
.init_after
)
2939 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
2942 rtx first
= get_insns ();
2944 insn
= emit_insn_before (seq
, first
);
2946 insn
= emit_insn (seq
);
2948 spill_fill_data
.init_after
= insn
;
2951 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
2953 /* ??? Not all of the spills are for varargs, but some of them are.
2954 The rest of the spills belong in an alias set of their own. But
2955 it doesn't actually hurt to include them here. */
2956 set_mem_alias_set (mem
, get_varargs_alias_set ());
2958 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
2959 spill_fill_data
.prev_off
[iter
] = cfa_off
;
2961 if (++iter
>= spill_fill_data
.n_iter
)
2963 spill_fill_data
.next_iter
= iter
;
2969 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
2972 int iter
= spill_fill_data
.next_iter
;
2975 mem
= spill_restore_mem (reg
, cfa_off
);
2976 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
2977 spill_fill_data
.prev_insn
[iter
] = insn
;
2984 RTX_FRAME_RELATED_P (insn
) = 1;
2986 /* Don't even pretend that the unwind code can intuit its way
2987 through a pair of interleaved post_modify iterators. Just
2988 provide the correct answer. */
2990 if (frame_pointer_needed
)
2992 base
= hard_frame_pointer_rtx
;
2997 base
= stack_pointer_rtx
;
2998 off
= current_frame_info
.total_size
- cfa_off
;
3001 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3002 gen_rtx_SET (VOIDmode
,
3003 gen_rtx_MEM (GET_MODE (reg
),
3004 plus_constant (base
, off
)),
3010 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3012 int iter
= spill_fill_data
.next_iter
;
3015 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3016 GEN_INT (cfa_off
)));
3017 spill_fill_data
.prev_insn
[iter
] = insn
;
3020 /* Wrapper functions that discards the CONST_INT spill offset. These
3021 exist so that we can give gr_spill/gr_fill the offset they need and
3022 use a consistent function interface. */
3025 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3027 return gen_movdi (dest
, src
);
3031 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3033 return gen_fr_spill (dest
, src
);
3037 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3039 return gen_fr_restore (dest
, src
);
3042 /* Called after register allocation to add any instructions needed for the
3043 prologue. Using a prologue insn is favored compared to putting all of the
3044 instructions in output_function_prologue(), since it allows the scheduler
3045 to intermix instructions with the saves of the caller saved registers. In
3046 some cases, it might be necessary to emit a barrier instruction as the last
3047 insn to prevent such scheduling.
3049 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3050 so that the debug info generation code can handle them properly.
3052 The register save area is layed out like so:
3054 [ varargs spill area ]
3055 [ fr register spill area ]
3056 [ br register spill area ]
3057 [ ar register spill area ]
3058 [ pr register spill area ]
3059 [ gr register spill area ] */
3061 /* ??? Get inefficient code when the frame size is larger than can fit in an
3062 adds instruction. */
3065 ia64_expand_prologue (void)
3067 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
3068 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3071 ia64_compute_frame_size (get_frame_size ());
3072 last_scratch_gr_reg
= 15;
3076 fprintf (dump_file
, "ia64 frame related registers "
3077 "recorded in current_frame_info.r[]:\n");
3078 #define PRINTREG(a) if (current_frame_info.r[a]) \
3079 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3081 PRINTREG(reg_save_b0
);
3082 PRINTREG(reg_save_pr
);
3083 PRINTREG(reg_save_ar_pfs
);
3084 PRINTREG(reg_save_ar_unat
);
3085 PRINTREG(reg_save_ar_lc
);
3086 PRINTREG(reg_save_gp
);
3090 /* If there is no epilogue, then we don't need some prologue insns.
3091 We need to avoid emitting the dead prologue insns, because flow
3092 will complain about them. */
3098 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
3099 if ((e
->flags
& EDGE_FAKE
) == 0
3100 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3102 epilogue_p
= (e
!= NULL
);
3107 /* Set the local, input, and output register names. We need to do this
3108 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3109 half. If we use in/loc/out register names, then we get assembler errors
3110 in crtn.S because there is no alloc insn or regstk directive in there. */
3111 if (! TARGET_REG_NAMES
)
3113 int inputs
= current_frame_info
.n_input_regs
;
3114 int locals
= current_frame_info
.n_local_regs
;
3115 int outputs
= current_frame_info
.n_output_regs
;
3117 for (i
= 0; i
< inputs
; i
++)
3118 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3119 for (i
= 0; i
< locals
; i
++)
3120 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3121 for (i
= 0; i
< outputs
; i
++)
3122 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3125 /* Set the frame pointer register name. The regnum is logically loc79,
3126 but of course we'll not have allocated that many locals. Rather than
3127 worrying about renumbering the existing rtxs, we adjust the name. */
3128 /* ??? This code means that we can never use one local register when
3129 there is a frame pointer. loc79 gets wasted in this case, as it is
3130 renamed to a register that will never be used. See also the try_locals
3131 code in find_gr_spill. */
3132 if (current_frame_info
.r
[reg_fp
])
3134 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3135 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3136 = reg_names
[current_frame_info
.r
[reg_fp
]];
3137 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3140 /* We don't need an alloc instruction if we've used no outputs or locals. */
3141 if (current_frame_info
.n_local_regs
== 0
3142 && current_frame_info
.n_output_regs
== 0
3143 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3144 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3146 /* If there is no alloc, but there are input registers used, then we
3147 need a .regstk directive. */
3148 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3149 ar_pfs_save_reg
= NULL_RTX
;
3153 current_frame_info
.need_regstk
= 0;
3155 if (current_frame_info
.r
[reg_save_ar_pfs
])
3157 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3158 reg_emitted (reg_save_ar_pfs
);
3161 regno
= next_scratch_gr_reg ();
3162 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3164 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3165 GEN_INT (current_frame_info
.n_input_regs
),
3166 GEN_INT (current_frame_info
.n_local_regs
),
3167 GEN_INT (current_frame_info
.n_output_regs
),
3168 GEN_INT (current_frame_info
.n_rotate_regs
)));
3169 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.r
[reg_save_ar_pfs
] != 0);
3172 /* Set up frame pointer, stack pointer, and spill iterators. */
3174 n_varargs
= cfun
->machine
->n_varargs
;
3175 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3176 stack_pointer_rtx
, 0);
3178 if (frame_pointer_needed
)
3180 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3181 RTX_FRAME_RELATED_P (insn
) = 1;
3184 if (current_frame_info
.total_size
!= 0)
3186 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3189 if (satisfies_constraint_I (frame_size_rtx
))
3190 offset
= frame_size_rtx
;
3193 regno
= next_scratch_gr_reg ();
3194 offset
= gen_rtx_REG (DImode
, regno
);
3195 emit_move_insn (offset
, frame_size_rtx
);
3198 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3199 stack_pointer_rtx
, offset
));
3201 if (! frame_pointer_needed
)
3203 RTX_FRAME_RELATED_P (insn
) = 1;
3204 if (GET_CODE (offset
) != CONST_INT
)
3205 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3206 gen_rtx_SET (VOIDmode
,
3208 gen_rtx_PLUS (DImode
,
3213 /* ??? At this point we must generate a magic insn that appears to
3214 modify the stack pointer, the frame pointer, and all spill
3215 iterators. This would allow the most scheduling freedom. For
3216 now, just hard stop. */
3217 emit_insn (gen_blockage ());
3220 /* Must copy out ar.unat before doing any integer spills. */
3221 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3223 if (current_frame_info
.r
[reg_save_ar_unat
])
3226 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3227 reg_emitted (reg_save_ar_unat
);
3231 alt_regno
= next_scratch_gr_reg ();
3232 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3233 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3236 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3237 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3238 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.r
[reg_save_ar_unat
] != 0);
3240 /* Even if we're not going to generate an epilogue, we still
3241 need to save the register so that EH works. */
3242 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3243 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3246 ar_unat_save_reg
= NULL_RTX
;
3248 /* Spill all varargs registers. Do this before spilling any GR registers,
3249 since we want the UNAT bits for the GR registers to override the UNAT
3250 bits from varargs, which we don't care about. */
3253 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3255 reg
= gen_rtx_REG (DImode
, regno
);
3256 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3259 /* Locate the bottom of the register save area. */
3260 cfa_off
= (current_frame_info
.spill_cfa_off
3261 + current_frame_info
.spill_size
3262 + current_frame_info
.extra_spill_size
);
3264 /* Save the predicate register block either in a register or in memory. */
3265 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3267 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3268 if (current_frame_info
.r
[reg_save_pr
] != 0)
3270 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3271 reg_emitted (reg_save_pr
);
3272 insn
= emit_move_insn (alt_reg
, reg
);
3274 /* ??? Denote pr spill/fill by a DImode move that modifies all
3275 64 hard registers. */
3276 RTX_FRAME_RELATED_P (insn
) = 1;
3277 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3278 gen_rtx_SET (VOIDmode
, alt_reg
, reg
));
3280 /* Even if we're not going to generate an epilogue, we still
3281 need to save the register so that EH works. */
3283 emit_insn (gen_prologue_use (alt_reg
));
3287 alt_regno
= next_scratch_gr_reg ();
3288 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3289 insn
= emit_move_insn (alt_reg
, reg
);
3290 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3295 /* Handle AR regs in numerical order. All of them get special handling. */
3296 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3297 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3299 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3300 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3304 /* The alloc insn already copied ar.pfs into a general register. The
3305 only thing we have to do now is copy that register to a stack slot
3306 if we'd not allocated a local register for the job. */
3307 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3308 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3310 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3311 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3315 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3317 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3318 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3320 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3321 reg_emitted (reg_save_ar_lc
);
3322 insn
= emit_move_insn (alt_reg
, reg
);
3323 RTX_FRAME_RELATED_P (insn
) = 1;
3325 /* Even if we're not going to generate an epilogue, we still
3326 need to save the register so that EH works. */
3328 emit_insn (gen_prologue_use (alt_reg
));
3332 alt_regno
= next_scratch_gr_reg ();
3333 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3334 emit_move_insn (alt_reg
, reg
);
3335 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3340 /* Save the return pointer. */
3341 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3343 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3344 if (current_frame_info
.r
[reg_save_b0
] != 0)
3346 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3347 reg_emitted (reg_save_b0
);
3348 insn
= emit_move_insn (alt_reg
, reg
);
3349 RTX_FRAME_RELATED_P (insn
) = 1;
3351 /* Even if we're not going to generate an epilogue, we still
3352 need to save the register so that EH works. */
3354 emit_insn (gen_prologue_use (alt_reg
));
3358 alt_regno
= next_scratch_gr_reg ();
3359 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3360 emit_move_insn (alt_reg
, reg
);
3361 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3366 if (current_frame_info
.r
[reg_save_gp
])
3368 reg_emitted (reg_save_gp
);
3369 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3370 current_frame_info
.r
[reg_save_gp
]),
3371 pic_offset_table_rtx
);
3374 /* We should now be at the base of the gr/br/fr spill area. */
3375 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3376 + current_frame_info
.spill_size
));
3378 /* Spill all general registers. */
3379 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3380 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3382 reg
= gen_rtx_REG (DImode
, regno
);
3383 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3387 /* Spill the rest of the BR registers. */
3388 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3389 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3391 alt_regno
= next_scratch_gr_reg ();
3392 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3393 reg
= gen_rtx_REG (DImode
, regno
);
3394 emit_move_insn (alt_reg
, reg
);
3395 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3399 /* Align the frame and spill all FR registers. */
3400 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3401 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3403 gcc_assert (!(cfa_off
& 15));
3404 reg
= gen_rtx_REG (XFmode
, regno
);
3405 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3409 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3411 finish_spill_pointers ();
3414 /* Called after register allocation to add any instructions needed for the
3415 epilogue. Using an epilogue insn is favored compared to putting all of the
3416 instructions in output_function_prologue(), since it allows the scheduler
3417 to intermix instructions with the saves of the caller saved registers. In
3418 some cases, it might be necessary to emit a barrier instruction as the last
3419 insn to prevent such scheduling. */
3422 ia64_expand_epilogue (int sibcall_p
)
3424 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
3425 int regno
, alt_regno
, cfa_off
;
3427 ia64_compute_frame_size (get_frame_size ());
3429 /* If there is a frame pointer, then we use it instead of the stack
3430 pointer, so that the stack pointer does not need to be valid when
3431 the epilogue starts. See EXIT_IGNORE_STACK. */
3432 if (frame_pointer_needed
)
3433 setup_spill_pointers (current_frame_info
.n_spilled
,
3434 hard_frame_pointer_rtx
, 0);
3436 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3437 current_frame_info
.total_size
);
3439 if (current_frame_info
.total_size
!= 0)
3441 /* ??? At this point we must generate a magic insn that appears to
3442 modify the spill iterators and the frame pointer. This would
3443 allow the most scheduling freedom. For now, just hard stop. */
3444 emit_insn (gen_blockage ());
3447 /* Locate the bottom of the register save area. */
3448 cfa_off
= (current_frame_info
.spill_cfa_off
3449 + current_frame_info
.spill_size
3450 + current_frame_info
.extra_spill_size
);
3452 /* Restore the predicate registers. */
3453 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3455 if (current_frame_info
.r
[reg_save_pr
] != 0)
3457 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3458 reg_emitted (reg_save_pr
);
3462 alt_regno
= next_scratch_gr_reg ();
3463 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3464 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3467 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3468 emit_move_insn (reg
, alt_reg
);
3471 /* Restore the application registers. */
3473 /* Load the saved unat from the stack, but do not restore it until
3474 after the GRs have been restored. */
3475 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3477 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3480 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3481 reg_emitted (reg_save_ar_unat
);
3485 alt_regno
= next_scratch_gr_reg ();
3486 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3487 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3488 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3493 ar_unat_save_reg
= NULL_RTX
;
3495 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3497 reg_emitted (reg_save_ar_pfs
);
3498 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3499 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3500 emit_move_insn (reg
, alt_reg
);
3502 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3504 alt_regno
= next_scratch_gr_reg ();
3505 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3506 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3508 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3509 emit_move_insn (reg
, alt_reg
);
3512 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3514 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3516 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3517 reg_emitted (reg_save_ar_lc
);
3521 alt_regno
= next_scratch_gr_reg ();
3522 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3523 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3526 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3527 emit_move_insn (reg
, alt_reg
);
3530 /* Restore the return pointer. */
3531 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3533 if (current_frame_info
.r
[reg_save_b0
] != 0)
3535 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3536 reg_emitted (reg_save_b0
);
3540 alt_regno
= next_scratch_gr_reg ();
3541 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3542 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3545 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3546 emit_move_insn (reg
, alt_reg
);
3549 /* We should now be at the base of the gr/br/fr spill area. */
3550 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3551 + current_frame_info
.spill_size
));
3553 /* The GP may be stored on the stack in the prologue, but it's
3554 never restored in the epilogue. Skip the stack slot. */
3555 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
3558 /* Restore all general registers. */
3559 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
3560 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3562 reg
= gen_rtx_REG (DImode
, regno
);
3563 do_restore (gen_gr_restore
, reg
, cfa_off
);
3567 /* Restore the branch registers. */
3568 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3569 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3571 alt_regno
= next_scratch_gr_reg ();
3572 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3573 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3575 reg
= gen_rtx_REG (DImode
, regno
);
3576 emit_move_insn (reg
, alt_reg
);
3579 /* Restore floating point registers. */
3580 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3581 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3583 gcc_assert (!(cfa_off
& 15));
3584 reg
= gen_rtx_REG (XFmode
, regno
);
3585 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
3589 /* Restore ar.unat for real. */
3590 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3592 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3593 emit_move_insn (reg
, ar_unat_save_reg
);
3596 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3598 finish_spill_pointers ();
3600 if (current_frame_info
.total_size
3601 || cfun
->machine
->ia64_eh_epilogue_sp
3602 || frame_pointer_needed
)
3604 /* ??? At this point we must generate a magic insn that appears to
3605 modify the spill iterators, the stack pointer, and the frame
3606 pointer. This would allow the most scheduling freedom. For now,
3608 emit_insn (gen_blockage ());
3611 if (cfun
->machine
->ia64_eh_epilogue_sp
)
3612 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
3613 else if (frame_pointer_needed
)
3615 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
3616 RTX_FRAME_RELATED_P (insn
) = 1;
3618 else if (current_frame_info
.total_size
)
3620 rtx offset
, frame_size_rtx
;
3622 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
3623 if (satisfies_constraint_I (frame_size_rtx
))
3624 offset
= frame_size_rtx
;
3627 regno
= next_scratch_gr_reg ();
3628 offset
= gen_rtx_REG (DImode
, regno
);
3629 emit_move_insn (offset
, frame_size_rtx
);
3632 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
3635 RTX_FRAME_RELATED_P (insn
) = 1;
3636 if (GET_CODE (offset
) != CONST_INT
)
3637 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3638 gen_rtx_SET (VOIDmode
,
3640 gen_rtx_PLUS (DImode
,
3645 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
3646 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
3649 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
3652 int fp
= GR_REG (2);
3653 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3654 first available call clobbered register. If there was a frame_pointer
3655 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3656 so we have to make sure we're using the string "r2" when emitting
3657 the register name for the assembler. */
3658 if (current_frame_info
.r
[reg_fp
]
3659 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
3660 fp
= HARD_FRAME_POINTER_REGNUM
;
3662 /* We must emit an alloc to force the input registers to become output
3663 registers. Otherwise, if the callee tries to pass its parameters
3664 through to another call without an intervening alloc, then these
3666 /* ??? We don't need to preserve all input registers. We only need to
3667 preserve those input registers used as arguments to the sibling call.
3668 It is unclear how to compute that number here. */
3669 if (current_frame_info
.n_input_regs
!= 0)
3671 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
3672 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
3673 const0_rtx
, const0_rtx
,
3674 n_inputs
, const0_rtx
));
3675 RTX_FRAME_RELATED_P (insn
) = 1;
3680 /* Return 1 if br.ret can do all the work required to return from a
3684 ia64_direct_return (void)
3686 if (reload_completed
&& ! frame_pointer_needed
)
3688 ia64_compute_frame_size (get_frame_size ());
3690 return (current_frame_info
.total_size
== 0
3691 && current_frame_info
.n_spilled
== 0
3692 && current_frame_info
.r
[reg_save_b0
] == 0
3693 && current_frame_info
.r
[reg_save_pr
] == 0
3694 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
3695 && current_frame_info
.r
[reg_save_ar_unat
] == 0
3696 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
3701 /* Return the magic cookie that we use to hold the return address
3702 during early compilation. */
3705 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
3709 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
3712 /* Split this value after reload, now that we know where the return
3713 address is saved. */
3716 ia64_split_return_addr_rtx (rtx dest
)
3720 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3722 if (current_frame_info
.r
[reg_save_b0
] != 0)
3724 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3725 reg_emitted (reg_save_b0
);
3733 /* Compute offset from CFA for BR0. */
3734 /* ??? Must be kept in sync with ia64_expand_prologue. */
3735 off
= (current_frame_info
.spill_cfa_off
3736 + current_frame_info
.spill_size
);
3737 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3738 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3741 /* Convert CFA offset to a register based offset. */
3742 if (frame_pointer_needed
)
3743 src
= hard_frame_pointer_rtx
;
3746 src
= stack_pointer_rtx
;
3747 off
+= current_frame_info
.total_size
;
3750 /* Load address into scratch register. */
3751 off_r
= GEN_INT (off
);
3752 if (satisfies_constraint_I (off_r
))
3753 emit_insn (gen_adddi3 (dest
, src
, off_r
));
3756 emit_move_insn (dest
, off_r
);
3757 emit_insn (gen_adddi3 (dest
, src
, dest
));
3760 src
= gen_rtx_MEM (Pmode
, dest
);
3764 src
= gen_rtx_REG (DImode
, BR_REG (0));
3766 emit_move_insn (dest
, src
);
3770 ia64_hard_regno_rename_ok (int from
, int to
)
3772 /* Don't clobber any of the registers we reserved for the prologue. */
3775 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
3776 if (to
== current_frame_info
.r
[r
]
3777 || from
== current_frame_info
.r
[r
]
3778 || to
== emitted_frame_related_regs
[r
]
3779 || from
== emitted_frame_related_regs
[r
])
3782 /* Don't use output registers outside the register frame. */
3783 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
3786 /* Retain even/oddness on predicate register pairs. */
3787 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
3788 return (from
& 1) == (to
& 1);
3793 /* Target hook for assembling integer objects. Handle word-sized
3794 aligned objects and detect the cases when @fptr is needed. */
3797 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3799 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
3800 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
3801 && GET_CODE (x
) == SYMBOL_REF
3802 && SYMBOL_REF_FUNCTION_P (x
))
3804 static const char * const directive
[2][2] = {
3805 /* 64-bit pointer */ /* 32-bit pointer */
3806 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3807 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3809 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
3810 output_addr_const (asm_out_file
, x
);
3811 fputs (")\n", asm_out_file
);
3814 return default_assemble_integer (x
, size
, aligned_p
);
3817 /* Emit the function prologue. */
3820 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3822 int mask
, grsave
, grsave_prev
;
3824 if (current_frame_info
.need_regstk
)
3825 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
3826 current_frame_info
.n_input_regs
,
3827 current_frame_info
.n_local_regs
,
3828 current_frame_info
.n_output_regs
,
3829 current_frame_info
.n_rotate_regs
);
3831 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3834 /* Emit the .prologue directive. */
3837 grsave
= grsave_prev
= 0;
3838 if (current_frame_info
.r
[reg_save_b0
] != 0)
3841 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
3843 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
3844 && (grsave_prev
== 0
3845 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
3848 if (grsave_prev
== 0)
3849 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
3850 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
3852 if (current_frame_info
.r
[reg_fp
] != 0
3853 && (grsave_prev
== 0
3854 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
3857 if (grsave_prev
== 0)
3858 grsave
= HARD_FRAME_POINTER_REGNUM
;
3859 grsave_prev
= current_frame_info
.r
[reg_fp
];
3861 if (current_frame_info
.r
[reg_save_pr
] != 0
3862 && (grsave_prev
== 0
3863 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
3866 if (grsave_prev
== 0)
3867 grsave
= current_frame_info
.r
[reg_save_pr
];
3870 if (mask
&& TARGET_GNU_AS
)
3871 fprintf (file
, "\t.prologue %d, %d\n", mask
,
3872 ia64_dbx_register_number (grsave
));
3874 fputs ("\t.prologue\n", file
);
3876 /* Emit a .spill directive, if necessary, to relocate the base of
3877 the register spill area. */
3878 if (current_frame_info
.spill_cfa_off
!= -16)
3879 fprintf (file
, "\t.spill %ld\n",
3880 (long) (current_frame_info
.spill_cfa_off
3881 + current_frame_info
.spill_size
));
3884 /* Emit the .body directive at the scheduled end of the prologue. */
3887 ia64_output_function_end_prologue (FILE *file
)
3889 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3892 fputs ("\t.body\n", file
);
3895 /* Emit the function epilogue. */
3898 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
3899 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3903 if (current_frame_info
.r
[reg_fp
])
3905 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3906 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3907 = reg_names
[current_frame_info
.r
[reg_fp
]];
3908 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3909 reg_emitted (reg_fp
);
3911 if (! TARGET_REG_NAMES
)
3913 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
3914 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
3915 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
3916 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
3917 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
3918 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
3921 current_frame_info
.initialized
= 0;
3925 ia64_dbx_register_number (int regno
)
3927 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3928 from its home at loc79 to something inside the register frame. We
3929 must perform the same renumbering here for the debug info. */
3930 if (current_frame_info
.r
[reg_fp
])
3932 if (regno
== HARD_FRAME_POINTER_REGNUM
)
3933 regno
= current_frame_info
.r
[reg_fp
];
3934 else if (regno
== current_frame_info
.r
[reg_fp
])
3935 regno
= HARD_FRAME_POINTER_REGNUM
;
3938 if (IN_REGNO_P (regno
))
3939 return 32 + regno
- IN_REG (0);
3940 else if (LOC_REGNO_P (regno
))
3941 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
3942 else if (OUT_REGNO_P (regno
))
3943 return (32 + current_frame_info
.n_input_regs
3944 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
3949 /* Implement TARGET_TRAMPOLINE_INIT.
3951 The trampoline should set the static chain pointer to value placed
3952 into the trampoline and should branch to the specified routine.
3953 To make the normal indirect-subroutine calling convention work,
3954 the trampoline must look like a function descriptor; the first
3955 word being the target address and the second being the target's
3958 We abuse the concept of a global pointer by arranging for it
3959 to point to the data we need to load. The complete trampoline
3960 has the following form:
3962 +-------------------+ \
3963 TRAMP: | __ia64_trampoline | |
3964 +-------------------+ > fake function descriptor
3966 +-------------------+ /
3967 | target descriptor |
3968 +-------------------+
3970 +-------------------+
3974 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
3976 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3977 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
3979 /* The Intel assembler requires that the global __ia64_trampoline symbol
3980 be declared explicitly */
3983 static bool declared_ia64_trampoline
= false;
3985 if (!declared_ia64_trampoline
)
3987 declared_ia64_trampoline
= true;
3988 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
3989 "__ia64_trampoline");
3993 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3994 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
3995 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
3996 static_chain
= convert_memory_address (Pmode
, static_chain
);
3998 /* Load up our iterator. */
3999 addr_reg
= copy_to_reg (addr
);
4000 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4002 /* The first two words are the fake descriptor:
4003 __ia64_trampoline, ADDR+16. */
4004 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4005 if (TARGET_ABI_OPEN_VMS
)
4007 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4008 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4009 relocation against function symbols to make it identical to the
4010 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4011 strict ELF and dereference to get the bare code address. */
4012 rtx reg
= gen_reg_rtx (Pmode
);
4013 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4014 emit_move_insn (reg
, tramp
);
4015 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4018 emit_move_insn (m_tramp
, tramp
);
4019 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4020 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4022 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (addr
, 16)));
4023 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4024 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4026 /* The third word is the target descriptor. */
4027 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4028 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4029 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4031 /* The fourth word is the static chain. */
4032 emit_move_insn (m_tramp
, static_chain
);
4035 /* Do any needed setup for a variadic function. CUM has not been updated
4036 for the last named argument which has type TYPE and mode MODE.
4038 We generate the actual spill instructions during prologue generation. */
4041 ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4042 tree type
, int * pretend_size
,
4043 int second_time ATTRIBUTE_UNUSED
)
4045 CUMULATIVE_ARGS next_cum
= *cum
;
4047 /* Skip the current argument. */
4048 ia64_function_arg_advance (&next_cum
, mode
, type
, 1);
4050 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4052 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4053 *pretend_size
= n
* UNITS_PER_WORD
;
4054 cfun
->machine
->n_varargs
= n
;
4058 /* Check whether TYPE is a homogeneous floating point aggregate. If
4059 it is, return the mode of the floating point type that appears
4060 in all leafs. If it is not, return VOIDmode.
4062 An aggregate is a homogeneous floating point aggregate is if all
4063 fields/elements in it have the same floating point type (e.g,
4064 SFmode). 128-bit quad-precision floats are excluded.
4066 Variable sized aggregates should never arrive here, since we should
4067 have already decided to pass them by reference. Top-level zero-sized
4068 aggregates are excluded because our parallels crash the middle-end. */
4070 static enum machine_mode
4071 hfa_element_mode (const_tree type
, bool nested
)
4073 enum machine_mode element_mode
= VOIDmode
;
4074 enum machine_mode mode
;
4075 enum tree_code code
= TREE_CODE (type
);
4076 int know_element_mode
= 0;
4079 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4084 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4085 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4086 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4087 case LANG_TYPE
: case FUNCTION_TYPE
:
4090 /* Fortran complex types are supposed to be HFAs, so we need to handle
4091 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4094 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4095 && TYPE_MODE (type
) != TCmode
)
4096 return GET_MODE_INNER (TYPE_MODE (type
));
4101 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4102 mode if this is contained within an aggregate. */
4103 if (nested
&& TYPE_MODE (type
) != TFmode
)
4104 return TYPE_MODE (type
);
4109 return hfa_element_mode (TREE_TYPE (type
), 1);
4113 case QUAL_UNION_TYPE
:
4114 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
4116 if (TREE_CODE (t
) != FIELD_DECL
)
4119 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4120 if (know_element_mode
)
4122 if (mode
!= element_mode
)
4125 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4129 know_element_mode
= 1;
4130 element_mode
= mode
;
4133 return element_mode
;
4136 /* If we reach here, we probably have some front-end specific type
4137 that the backend doesn't know about. This can happen via the
4138 aggregate_value_p call in init_function_start. All we can do is
4139 ignore unknown tree types. */
4146 /* Return the number of words required to hold a quantity of TYPE and MODE
4147 when passed as an argument. */
4149 ia64_function_arg_words (tree type
, enum machine_mode mode
)
4153 if (mode
== BLKmode
)
4154 words
= int_size_in_bytes (type
);
4156 words
= GET_MODE_SIZE (mode
);
4158 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4161 /* Return the number of registers that should be skipped so the current
4162 argument (described by TYPE and WORDS) will be properly aligned.
4164 Integer and float arguments larger than 8 bytes start at the next
4165 even boundary. Aggregates larger than 8 bytes start at the next
4166 even boundary if the aggregate has 16 byte alignment. Note that
4167 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4168 but are still to be aligned in registers.
4170 ??? The ABI does not specify how to handle aggregates with
4171 alignment from 9 to 15 bytes, or greater than 16. We handle them
4172 all as if they had 16 byte alignment. Such aggregates can occur
4173 only if gcc extensions are used. */
4175 ia64_function_arg_offset (CUMULATIVE_ARGS
*cum
, tree type
, int words
)
4177 /* No registers are skipped on VMS. */
4178 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4182 && TREE_CODE (type
) != INTEGER_TYPE
4183 && TREE_CODE (type
) != REAL_TYPE
)
4184 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4189 /* Return rtx for register where argument is passed, or zero if it is passed
4191 /* ??? 128-bit quad-precision floats are always passed in general
4195 ia64_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
, tree type
,
4196 int named
, int incoming
)
4198 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4199 int words
= ia64_function_arg_words (type
, mode
);
4200 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4201 enum machine_mode hfa_mode
= VOIDmode
;
4203 /* For OPEN VMS, emit the instruction setting up the argument register here,
4204 when we know this will be together with the other arguments setup related
4205 insns. This is not the conceptually best place to do this, but this is
4206 the easiest as we have convenient access to cumulative args info. */
4208 if (TARGET_ABI_OPEN_VMS
&& mode
== VOIDmode
&& type
== void_type_node
4211 unsigned HOST_WIDE_INT regval
= cum
->words
;
4214 for (i
= 0; i
< 8; i
++)
4215 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4217 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4221 /* If all argument slots are used, then it must go on the stack. */
4222 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4225 /* Check for and handle homogeneous FP aggregates. */
4227 hfa_mode
= hfa_element_mode (type
, 0);
4229 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4230 and unprototyped hfas are passed specially. */
4231 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4235 int fp_regs
= cum
->fp_regs
;
4236 int int_regs
= cum
->words
+ offset
;
4237 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4241 /* If prototyped, pass it in FR regs then GR regs.
4242 If not prototyped, pass it in both FR and GR regs.
4244 If this is an SFmode aggregate, then it is possible to run out of
4245 FR regs while GR regs are still left. In that case, we pass the
4246 remaining part in the GR regs. */
4248 /* Fill the FP regs. We do this always. We stop if we reach the end
4249 of the argument, the last FP register, or the last argument slot. */
4251 byte_size
= ((mode
== BLKmode
)
4252 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4253 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4255 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4256 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4258 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4259 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4263 args_byte_size
+= hfa_size
;
4267 /* If no prototype, then the whole thing must go in GR regs. */
4268 if (! cum
->prototype
)
4270 /* If this is an SFmode aggregate, then we might have some left over
4271 that needs to go in GR regs. */
4272 else if (byte_size
!= offset
)
4273 int_regs
+= offset
/ UNITS_PER_WORD
;
4275 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4277 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4279 enum machine_mode gr_mode
= DImode
;
4280 unsigned int gr_size
;
4282 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4283 then this goes in a GR reg left adjusted/little endian, right
4284 adjusted/big endian. */
4285 /* ??? Currently this is handled wrong, because 4-byte hunks are
4286 always right adjusted/little endian. */
4289 /* If we have an even 4 byte hunk because the aggregate is a
4290 multiple of 4 bytes in size, then this goes in a GR reg right
4291 adjusted/little endian. */
4292 else if (byte_size
- offset
== 4)
4295 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4296 gen_rtx_REG (gr_mode
, (basereg
4300 gr_size
= GET_MODE_SIZE (gr_mode
);
4302 if (gr_size
== UNITS_PER_WORD
4303 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4305 else if (gr_size
> UNITS_PER_WORD
)
4306 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4308 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4311 /* On OpenVMS variable argument is either in Rn or Fn. */
4312 else if (TARGET_ABI_OPEN_VMS
&& named
== 0)
4314 if (FLOAT_MODE_P (mode
))
4315 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->words
);
4317 return gen_rtx_REG (mode
, basereg
+ cum
->words
);
4320 /* Integral and aggregates go in general registers. If we have run out of
4321 FR registers, then FP values must also go in general registers. This can
4322 happen when we have a SFmode HFA. */
4323 else if (mode
== TFmode
|| mode
== TCmode
4324 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4326 int byte_size
= ((mode
== BLKmode
)
4327 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4328 if (BYTES_BIG_ENDIAN
4329 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4330 && byte_size
< UNITS_PER_WORD
4333 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4334 gen_rtx_REG (DImode
,
4335 (basereg
+ cum
->words
4338 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4341 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4345 /* If there is a prototype, then FP values go in a FR register when
4346 named, and in a GR register when unnamed. */
4347 else if (cum
->prototype
)
4350 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4351 /* In big-endian mode, an anonymous SFmode value must be represented
4352 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4353 the value into the high half of the general register. */
4354 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4355 return gen_rtx_PARALLEL (mode
,
4357 gen_rtx_EXPR_LIST (VOIDmode
,
4358 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4361 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4363 /* If there is no prototype, then FP values go in both FR and GR
4367 /* See comment above. */
4368 enum machine_mode inner_mode
=
4369 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4371 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4372 gen_rtx_REG (mode
, (FR_ARG_FIRST
4375 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4376 gen_rtx_REG (inner_mode
,
4377 (basereg
+ cum
->words
4381 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4385 /* Return number of bytes, at the beginning of the argument, that must be
4386 put in registers. 0 is the argument is entirely in registers or entirely
4390 ia64_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4391 tree type
, bool named ATTRIBUTE_UNUSED
)
4393 int words
= ia64_function_arg_words (type
, mode
);
4394 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4396 /* If all argument slots are used, then it must go on the stack. */
4397 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4400 /* It doesn't matter whether the argument goes in FR or GR regs. If
4401 it fits within the 8 argument slots, then it goes entirely in
4402 registers. If it extends past the last argument slot, then the rest
4403 goes on the stack. */
4405 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4408 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4411 /* Return ivms_arg_type based on machine_mode. */
4413 static enum ivms_arg_type
4414 ia64_arg_type (enum machine_mode mode
)
4427 /* Update CUM to point after this argument. This is patterned after
4428 ia64_function_arg. */
4431 ia64_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4432 tree type
, int named
)
4434 int words
= ia64_function_arg_words (type
, mode
);
4435 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4436 enum machine_mode hfa_mode
= VOIDmode
;
4438 /* If all arg slots are already full, then there is nothing to do. */
4439 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
4441 cum
->words
+= words
+ offset
;
4445 cum
->atypes
[cum
->words
] = ia64_arg_type (mode
);
4446 cum
->words
+= words
+ offset
;
4448 /* Check for and handle homogeneous FP aggregates. */
4450 hfa_mode
= hfa_element_mode (type
, 0);
4452 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4453 and unprototyped hfas are passed specially. */
4454 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4456 int fp_regs
= cum
->fp_regs
;
4457 /* This is the original value of cum->words + offset. */
4458 int int_regs
= cum
->words
- words
;
4459 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4463 /* If prototyped, pass it in FR regs then GR regs.
4464 If not prototyped, pass it in both FR and GR regs.
4466 If this is an SFmode aggregate, then it is possible to run out of
4467 FR regs while GR regs are still left. In that case, we pass the
4468 remaining part in the GR regs. */
4470 /* Fill the FP regs. We do this always. We stop if we reach the end
4471 of the argument, the last FP register, or the last argument slot. */
4473 byte_size
= ((mode
== BLKmode
)
4474 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4475 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4477 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4478 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
4481 args_byte_size
+= hfa_size
;
4485 cum
->fp_regs
= fp_regs
;
4488 /* On OpenVMS variable argument is either in Rn or Fn. */
4489 else if (TARGET_ABI_OPEN_VMS
&& named
== 0)
4491 cum
->int_regs
= cum
->words
;
4492 cum
->fp_regs
= cum
->words
;
4495 /* Integral and aggregates go in general registers. So do TFmode FP values.
4496 If we have run out of FR registers, then other FP values must also go in
4497 general registers. This can happen when we have a SFmode HFA. */
4498 else if (mode
== TFmode
|| mode
== TCmode
4499 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4500 cum
->int_regs
= cum
->words
;
4502 /* If there is a prototype, then FP values go in a FR register when
4503 named, and in a GR register when unnamed. */
4504 else if (cum
->prototype
)
4507 cum
->int_regs
= cum
->words
;
4509 /* ??? Complex types should not reach here. */
4510 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
4512 /* If there is no prototype, then FP values go in both FR and GR
4516 /* ??? Complex types should not reach here. */
4517 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
4518 cum
->int_regs
= cum
->words
;
4522 /* Arguments with alignment larger than 8 bytes start at the next even
4523 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4524 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4527 ia64_function_arg_boundary (enum machine_mode mode
, tree type
)
4530 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
4531 return PARM_BOUNDARY
* 2;
4535 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
4536 return PARM_BOUNDARY
* 2;
4538 return PARM_BOUNDARY
;
4541 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
4542 return PARM_BOUNDARY
* 2;
4544 return PARM_BOUNDARY
;
4547 /* True if it is OK to do sibling call optimization for the specified
4548 call expression EXP. DECL will be the called function, or NULL if
4549 this is an indirect call. */
4551 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4553 /* We can't perform a sibcall if the current function has the syscall_linkage
4555 if (lookup_attribute ("syscall_linkage",
4556 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
4559 /* We must always return with our current GP. This means we can
4560 only sibcall to functions defined in the current module unless
4561 TARGET_CONST_GP is set to true. */
4562 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
4566 /* Implement va_arg. */
4569 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4572 /* Variable sized types are passed by reference. */
4573 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
4575 tree ptrtype
= build_pointer_type (type
);
4576 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
4577 return build_va_arg_indirect_ref (addr
);
4580 /* Aggregate arguments with alignment larger than 8 bytes start at
4581 the next even boundary. Integer and floating point arguments
4582 do so if they are larger than 8 bytes, whether or not they are
4583 also aligned larger than 8 bytes. */
4584 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
4585 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
4587 tree t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (valist
), valist
,
4588 size_int (2 * UNITS_PER_WORD
- 1));
4589 t
= fold_convert (sizetype
, t
);
4590 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4591 size_int (-2 * UNITS_PER_WORD
));
4592 t
= fold_convert (TREE_TYPE (valist
), t
);
4593 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
4596 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4599 /* Return 1 if function return value returned in memory. Return 0 if it is
4603 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
4605 enum machine_mode mode
;
4606 enum machine_mode hfa_mode
;
4607 HOST_WIDE_INT byte_size
;
4609 mode
= TYPE_MODE (valtype
);
4610 byte_size
= GET_MODE_SIZE (mode
);
4611 if (mode
== BLKmode
)
4613 byte_size
= int_size_in_bytes (valtype
);
4618 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4620 hfa_mode
= hfa_element_mode (valtype
, 0);
4621 if (hfa_mode
!= VOIDmode
)
4623 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4625 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
4630 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
4636 /* Return rtx for register that holds the function return value. */
4639 ia64_function_value (const_tree valtype
, const_tree func
)
4641 enum machine_mode mode
;
4642 enum machine_mode hfa_mode
;
4645 mode
= TYPE_MODE (valtype
);
4646 hfa_mode
= hfa_element_mode (valtype
, 0);
4648 if (hfa_mode
!= VOIDmode
)
4656 hfa_size
= GET_MODE_SIZE (hfa_mode
);
4657 byte_size
= ((mode
== BLKmode
)
4658 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
4660 for (i
= 0; offset
< byte_size
; i
++)
4662 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4663 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
4667 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4669 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
4670 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
4673 bool need_parallel
= false;
4675 /* In big-endian mode, we need to manage the layout of aggregates
4676 in the registers so that we get the bits properly aligned in
4677 the highpart of the registers. */
4678 if (BYTES_BIG_ENDIAN
4679 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
4680 need_parallel
= true;
4682 /* Something like struct S { long double x; char a[0] } is not an
4683 HFA structure, and therefore doesn't go in fp registers. But
4684 the middle-end will give it XFmode anyway, and XFmode values
4685 don't normally fit in integer registers. So we need to smuggle
4686 the value inside a parallel. */
4687 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
4688 need_parallel
= true;
4698 bytesize
= int_size_in_bytes (valtype
);
4699 /* An empty PARALLEL is invalid here, but the return value
4700 doesn't matter for empty structs. */
4702 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4703 for (i
= 0; offset
< bytesize
; i
++)
4705 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4706 gen_rtx_REG (DImode
,
4709 offset
+= UNITS_PER_WORD
;
4711 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4714 mode
= ia64_promote_function_mode (valtype
, mode
, &unsignedp
,
4715 func
? TREE_TYPE (func
) : NULL_TREE
,
4718 return gen_rtx_REG (mode
, GR_RET_FIRST
);
4722 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4723 We need to emit DTP-relative relocations. */
4726 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
4728 gcc_assert (size
== 4 || size
== 8);
4730 fputs ("\tdata4.ua\t@dtprel(", file
);
4732 fputs ("\tdata8.ua\t@dtprel(", file
);
4733 output_addr_const (file
, x
);
4737 /* Print a memory address as an operand to reference that memory location. */
4739 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4740 also call this from ia64_print_operand for memory addresses. */
4743 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
4744 rtx address ATTRIBUTE_UNUSED
)
4748 /* Print an operand to an assembler instruction.
4749 C Swap and print a comparison operator.
4750 D Print an FP comparison operator.
4751 E Print 32 - constant, for SImode shifts as extract.
4752 e Print 64 - constant, for DImode rotates.
4753 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4754 a floating point register emitted normally.
4755 G A floating point constant.
4756 I Invert a predicate register by adding 1.
4757 J Select the proper predicate register for a condition.
4758 j Select the inverse predicate register for a condition.
4759 O Append .acq for volatile load.
4760 P Postincrement of a MEM.
4761 Q Append .rel for volatile store.
4762 R Print .s .d or nothing for a single, double or no truncation.
4763 S Shift amount for shladd instruction.
4764 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4765 for Intel assembler.
4766 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4767 for Intel assembler.
4768 X A pair of floating point registers.
4769 r Print register name, or constant 0 as r0. HP compatibility for
4771 v Print vector constant value as an 8-byte integer value. */
4774 ia64_print_operand (FILE * file
, rtx x
, int code
)
4781 /* Handled below. */
4786 enum rtx_code c
= swap_condition (GET_CODE (x
));
4787 fputs (GET_RTX_NAME (c
), file
);
4792 switch (GET_CODE (x
))
4816 str
= GET_RTX_NAME (GET_CODE (x
));
4823 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
4827 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
4831 if (x
== CONST0_RTX (GET_MODE (x
)))
4832 str
= reg_names
[FR_REG (0)];
4833 else if (x
== CONST1_RTX (GET_MODE (x
)))
4834 str
= reg_names
[FR_REG (1)];
4837 gcc_assert (GET_CODE (x
) == REG
);
4838 str
= reg_names
[REGNO (x
)];
4847 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
4848 real_to_target (val
, &rv
, GET_MODE (x
));
4849 if (GET_MODE (x
) == SFmode
)
4850 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
4851 else if (GET_MODE (x
) == DFmode
)
4852 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
4854 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
4857 output_operand_lossage ("invalid %%G mode");
4862 fputs (reg_names
[REGNO (x
) + 1], file
);
4868 unsigned int regno
= REGNO (XEXP (x
, 0));
4869 if (GET_CODE (x
) == EQ
)
4873 fputs (reg_names
[regno
], file
);
4878 if (MEM_VOLATILE_P (x
))
4879 fputs(".acq", file
);
4884 HOST_WIDE_INT value
;
4886 switch (GET_CODE (XEXP (x
, 0)))
4892 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4893 if (GET_CODE (x
) == CONST_INT
)
4897 gcc_assert (GET_CODE (x
) == REG
);
4898 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
4904 value
= GET_MODE_SIZE (GET_MODE (x
));
4908 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
4912 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
4917 if (MEM_VOLATILE_P (x
))
4918 fputs(".rel", file
);
4922 if (x
== CONST0_RTX (GET_MODE (x
)))
4924 else if (x
== CONST1_RTX (GET_MODE (x
)))
4926 else if (x
== CONST2_RTX (GET_MODE (x
)))
4929 output_operand_lossage ("invalid %%R value");
4933 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
4937 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4939 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
4945 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
4947 const char *prefix
= "0x";
4948 if (INTVAL (x
) & 0x80000000)
4950 fprintf (file
, "0xffffffff");
4953 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
4960 unsigned int regno
= REGNO (x
);
4961 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
4966 /* If this operand is the constant zero, write it as register zero.
4967 Any register, zero, or CONST_INT value is OK here. */
4968 if (GET_CODE (x
) == REG
)
4969 fputs (reg_names
[REGNO (x
)], file
);
4970 else if (x
== CONST0_RTX (GET_MODE (x
)))
4972 else if (GET_CODE (x
) == CONST_INT
)
4973 output_addr_const (file
, x
);
4975 output_operand_lossage ("invalid %%r value");
4979 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
4980 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
4987 /* For conditional branches, returns or calls, substitute
4988 sptk, dptk, dpnt, or spnt for %s. */
4989 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
4992 int pred_val
= INTVAL (XEXP (x
, 0));
4994 /* Guess top and bottom 10% statically predicted. */
4995 if (pred_val
< REG_BR_PROB_BASE
/ 50
4996 && br_prob_note_reliable_p (x
))
4998 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5000 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5001 || !br_prob_note_reliable_p (x
))
5006 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
5011 fputs (which
, file
);
5016 x
= current_insn_predicate
;
5019 unsigned int regno
= REGNO (XEXP (x
, 0));
5020 if (GET_CODE (x
) == EQ
)
5022 fprintf (file
, "(%s) ", reg_names
[regno
]);
5027 output_operand_lossage ("ia64_print_operand: unknown code");
5031 switch (GET_CODE (x
))
5033 /* This happens for the spill/restore instructions. */
5038 /* ... fall through ... */
5041 fputs (reg_names
[REGNO (x
)], file
);
5046 rtx addr
= XEXP (x
, 0);
5047 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5048 addr
= XEXP (addr
, 0);
5049 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5054 output_addr_const (file
, x
);
5061 /* Compute a (partial) cost for rtx X. Return true if the complete
5062 cost has been computed, and false if subexpressions should be
5063 scanned. In either case, *TOTAL contains the cost result. */
5064 /* ??? This is incomplete. */
5067 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
5068 bool speed ATTRIBUTE_UNUSED
)
5076 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5079 if (satisfies_constraint_I (x
))
5081 else if (satisfies_constraint_J (x
))
5084 *total
= COSTS_N_INSNS (1);
5087 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5090 *total
= COSTS_N_INSNS (1);
5095 *total
= COSTS_N_INSNS (1);
5101 *total
= COSTS_N_INSNS (3);
5105 /* For multiplies wider than HImode, we have to go to the FPU,
5106 which normally involves copies. Plus there's the latency
5107 of the multiply itself, and the latency of the instructions to
5108 transfer integer regs to FP regs. */
5109 /* ??? Check for FP mode. */
5110 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
5111 *total
= COSTS_N_INSNS (10);
5113 *total
= COSTS_N_INSNS (2);
5121 *total
= COSTS_N_INSNS (1);
5128 /* We make divide expensive, so that divide-by-constant will be
5129 optimized to a multiply. */
5130 *total
= COSTS_N_INSNS (60);
5138 /* Calculate the cost of moving data from a register in class FROM to
5139 one in class TO, using MODE. */
5142 ia64_register_move_cost (enum machine_mode mode
, enum reg_class from
,
5145 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5146 if (to
== ADDL_REGS
)
5148 if (from
== ADDL_REGS
)
5151 /* All costs are symmetric, so reduce cases by putting the
5152 lower number class as the destination. */
5155 enum reg_class tmp
= to
;
5156 to
= from
, from
= tmp
;
5159 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5160 so that we get secondary memory reloads. Between FR_REGS,
5161 we have to make this at least as expensive as MEMORY_MOVE_COST
5162 to avoid spectacularly poor register class preferencing. */
5163 if (mode
== XFmode
|| mode
== RFmode
)
5165 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5166 return MEMORY_MOVE_COST (mode
, to
, 0);
5174 /* Moving between PR registers takes two insns. */
5175 if (from
== PR_REGS
)
5177 /* Moving between PR and anything but GR is impossible. */
5178 if (from
!= GR_REGS
)
5179 return MEMORY_MOVE_COST (mode
, to
, 0);
5183 /* Moving between BR and anything but GR is impossible. */
5184 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5185 return MEMORY_MOVE_COST (mode
, to
, 0);
5190 /* Moving between AR and anything but GR is impossible. */
5191 if (from
!= GR_REGS
)
5192 return MEMORY_MOVE_COST (mode
, to
, 0);
5198 case GR_AND_FR_REGS
:
5199 case GR_AND_BR_REGS
:
5210 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
5211 to use when copying X into that class. */
5214 ia64_preferred_reload_class (rtx x
, enum reg_class rclass
)
5220 /* Don't allow volatile mem reloads into floating point registers.
5221 This is defined to force reload to choose the r/m case instead
5222 of the f/f case when reloading (set (reg fX) (mem/v)). */
5223 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5226 /* Force all unrecognized constants into the constant pool. */
5244 /* This function returns the register class required for a secondary
5245 register when copying between one of the registers in RCLASS, and X,
5246 using MODE. A return value of NO_REGS means that no secondary register
5250 ia64_secondary_reload_class (enum reg_class rclass
,
5251 enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5255 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5256 regno
= true_regnum (x
);
5263 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5264 interaction. We end up with two pseudos with overlapping lifetimes
5265 both of which are equiv to the same constant, and both which need
5266 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5267 changes depending on the path length, which means the qty_first_reg
5268 check in make_regs_eqv can give different answers at different times.
5269 At some point I'll probably need a reload_indi pattern to handle
5272 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5273 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5274 non-general registers for good measure. */
5275 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5278 /* This is needed if a pseudo used as a call_operand gets spilled to a
5280 if (GET_CODE (x
) == MEM
)
5286 /* Need to go through general registers to get to other class regs. */
5287 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5290 /* This can happen when a paradoxical subreg is an operand to the
5292 /* ??? This shouldn't be necessary after instruction scheduling is
5293 enabled, because paradoxical subregs are not accepted by
5294 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5295 stop the paradoxical subreg stupidity in the *_operand functions
5297 if (GET_CODE (x
) == MEM
5298 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5299 || GET_MODE (x
) == QImode
))
5302 /* This can happen because of the ior/and/etc patterns that accept FP
5303 registers as operands. If the third operand is a constant, then it
5304 needs to be reloaded into a FP register. */
5305 if (GET_CODE (x
) == CONST_INT
)
5308 /* This can happen because of register elimination in a muldi3 insn.
5309 E.g. `26107 * (unsigned long)&u'. */
5310 if (GET_CODE (x
) == PLUS
)
5315 /* ??? This happens if we cse/gcse a BImode value across a call,
5316 and the function has a nonlocal goto. This is because global
5317 does not allocate call crossing pseudos to hard registers when
5318 crtl->has_nonlocal_goto is true. This is relatively
5319 common for C++ programs that use exceptions. To reproduce,
5320 return NO_REGS and compile libstdc++. */
5321 if (GET_CODE (x
) == MEM
)
5324 /* This can happen when we take a BImode subreg of a DImode value,
5325 and that DImode value winds up in some non-GR register. */
5326 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5338 /* Implement targetm.unspec_may_trap_p hook. */
5340 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5342 if (GET_CODE (x
) == UNSPEC
)
5344 switch (XINT (x
, 1))
5350 case UNSPEC_CHKACLR
:
5352 /* These unspecs are just wrappers. */
5353 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5357 return default_unspec_may_trap_p (x
, flags
);
5361 /* Parse the -mfixed-range= option string. */
5364 fix_range (const char *const_str
)
5367 char *str
, *dash
, *comma
;
5369 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5370 REG2 are either register names or register numbers. The effect
5371 of this option is to mark the registers in the range from REG1 to
5372 REG2 as ``fixed'' so they won't be used by the compiler. This is
5373 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5375 i
= strlen (const_str
);
5376 str
= (char *) alloca (i
+ 1);
5377 memcpy (str
, const_str
, i
+ 1);
5381 dash
= strchr (str
, '-');
5384 warning (0, "value of -mfixed-range must have form REG1-REG2");
5389 comma
= strchr (dash
+ 1, ',');
5393 first
= decode_reg_name (str
);
5396 warning (0, "unknown register name: %s", str
);
5400 last
= decode_reg_name (dash
+ 1);
5403 warning (0, "unknown register name: %s", dash
+ 1);
5411 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5415 for (i
= first
; i
<= last
; ++i
)
5416 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5426 /* Implement TARGET_HANDLE_OPTION. */
5429 ia64_handle_option (size_t code
, const char *arg
, int value
)
5433 case OPT_mfixed_range_
:
5437 case OPT_mtls_size_
:
5438 if (value
!= 14 && value
!= 22 && value
!= 64)
5439 error ("bad value %<%s%> for -mtls-size= switch", arg
);
5446 const char *name
; /* processor name or nickname. */
5447 enum processor_type processor
;
5449 const processor_alias_table
[] =
5451 {"itanium2", PROCESSOR_ITANIUM2
},
5452 {"mckinley", PROCESSOR_ITANIUM2
},
5454 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
5457 for (i
= 0; i
< pta_size
; i
++)
5458 if (!strcmp (arg
, processor_alias_table
[i
].name
))
5460 ia64_tune
= processor_alias_table
[i
].processor
;
5464 error ("bad value %<%s%> for -mtune= switch", arg
);
5473 /* Implement OVERRIDE_OPTIONS. */
5476 ia64_override_options (void)
5478 if (TARGET_AUTO_PIC
)
5479 target_flags
|= MASK_CONST_GP
;
5481 /* Numerous experiment shows that IRA based loop pressure
5482 calculation works better for RTL loop invariant motion on targets
5483 with enough (>= 32) registers. It is an expensive optimization.
5484 So it is on only for peak performance. */
5486 flag_ira_loop_pressure
= 1;
5489 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
5491 init_machine_status
= ia64_init_machine_status
;
5493 if (align_functions
<= 0)
5494 align_functions
= 64;
5495 if (align_loops
<= 0)
5497 if (TARGET_ABI_OPEN_VMS
)
5500 ia64_override_options_after_change();
5503 /* Implement targetm.override_options_after_change. */
5506 ia64_override_options_after_change (void)
5508 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
5509 flag_schedule_insns_after_reload
= 0;
5512 && ! sel_sched_switch_set
)
5514 flag_selective_scheduling2
= 1;
5515 flag_sel_sched_pipelining
= 1;
5517 if (mflag_sched_control_spec
== 2)
5519 /* Control speculation is on by default for the selective scheduler,
5520 but not for the Haifa scheduler. */
5521 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
5523 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
5525 /* FIXME: remove this when we'd implement breaking autoinsns as
5526 a transformation. */
5527 flag_auto_inc_dec
= 0;
5531 /* Initialize the record of emitted frame related registers. */
5533 void ia64_init_expanders (void)
5535 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
5538 static struct machine_function
*
5539 ia64_init_machine_status (void)
5541 return GGC_CNEW (struct machine_function
);
5544 static enum attr_itanium_class
ia64_safe_itanium_class (rtx
);
5545 static enum attr_type
ia64_safe_type (rtx
);
5547 static enum attr_itanium_class
5548 ia64_safe_itanium_class (rtx insn
)
5550 if (recog_memoized (insn
) >= 0)
5551 return get_attr_itanium_class (insn
);
5552 else if (DEBUG_INSN_P (insn
))
5553 return ITANIUM_CLASS_IGNORE
;
5555 return ITANIUM_CLASS_UNKNOWN
;
5558 static enum attr_type
5559 ia64_safe_type (rtx insn
)
5561 if (recog_memoized (insn
) >= 0)
5562 return get_attr_type (insn
);
5564 return TYPE_UNKNOWN
;
5567 /* The following collection of routines emit instruction group stop bits as
5568 necessary to avoid dependencies. */
5570 /* Need to track some additional registers as far as serialization is
5571 concerned so we can properly handle br.call and br.ret. We could
5572 make these registers visible to gcc, but since these registers are
5573 never explicitly used in gcc generated code, it seems wasteful to
5574 do so (plus it would make the call and return patterns needlessly
5576 #define REG_RP (BR_REG (0))
5577 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5578 /* This is used for volatile asms which may require a stop bit immediately
5579 before and after them. */
5580 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5581 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5582 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5584 /* For each register, we keep track of how it has been written in the
5585 current instruction group.
5587 If a register is written unconditionally (no qualifying predicate),
5588 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5590 If a register is written if its qualifying predicate P is true, we
5591 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5592 may be written again by the complement of P (P^1) and when this happens,
5593 WRITE_COUNT gets set to 2.
5595 The result of this is that whenever an insn attempts to write a register
5596 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5598 If a predicate register is written by a floating-point insn, we set
5599 WRITTEN_BY_FP to true.
5601 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5602 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5604 #if GCC_VERSION >= 4000
5605 #define RWS_FIELD_TYPE __extension__ unsigned short
5607 #define RWS_FIELD_TYPE unsigned int
5609 struct reg_write_state
5611 RWS_FIELD_TYPE write_count
: 2;
5612 RWS_FIELD_TYPE first_pred
: 10;
5613 RWS_FIELD_TYPE written_by_fp
: 1;
5614 RWS_FIELD_TYPE written_by_and
: 1;
5615 RWS_FIELD_TYPE written_by_or
: 1;
5618 /* Cumulative info for the current instruction group. */
5619 struct reg_write_state rws_sum
[NUM_REGS
];
5620 #ifdef ENABLE_CHECKING
5621 /* Bitmap whether a register has been written in the current insn. */
5622 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
5623 / HOST_BITS_PER_WIDEST_FAST_INT
];
5626 rws_insn_set (int regno
)
5628 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
5629 SET_HARD_REG_BIT (rws_insn
, regno
);
5633 rws_insn_test (int regno
)
5635 return TEST_HARD_REG_BIT (rws_insn
, regno
);
5638 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5639 unsigned char rws_insn
[2];
5642 rws_insn_set (int regno
)
5644 if (regno
== REG_AR_CFM
)
5646 else if (regno
== REG_VOLATILE
)
5651 rws_insn_test (int regno
)
5653 if (regno
== REG_AR_CFM
)
5655 if (regno
== REG_VOLATILE
)
5661 /* Indicates whether this is the first instruction after a stop bit,
5662 in which case we don't need another stop bit. Without this,
5663 ia64_variable_issue will die when scheduling an alloc. */
5664 static int first_instruction
;
5666 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5667 RTL for one instruction. */
5670 unsigned int is_write
: 1; /* Is register being written? */
5671 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
5672 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
5673 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
5674 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
5675 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
5678 static void rws_update (int, struct reg_flags
, int);
5679 static int rws_access_regno (int, struct reg_flags
, int);
5680 static int rws_access_reg (rtx
, struct reg_flags
, int);
5681 static void update_set_flags (rtx
, struct reg_flags
*);
5682 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
5683 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
5684 static void init_insn_group_barriers (void);
5685 static int group_barrier_needed (rtx
);
5686 static int safe_group_barrier_needed (rtx
);
5687 static int in_safe_group_barrier
;
5689 /* Update *RWS for REGNO, which is being written by the current instruction,
5690 with predicate PRED, and associated register flags in FLAGS. */
5693 rws_update (int regno
, struct reg_flags flags
, int pred
)
5696 rws_sum
[regno
].write_count
++;
5698 rws_sum
[regno
].write_count
= 2;
5699 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
5700 /* ??? Not tracking and/or across differing predicates. */
5701 rws_sum
[regno
].written_by_and
= flags
.is_and
;
5702 rws_sum
[regno
].written_by_or
= flags
.is_or
;
5703 rws_sum
[regno
].first_pred
= pred
;
5706 /* Handle an access to register REGNO of type FLAGS using predicate register
5707 PRED. Update rws_sum array. Return 1 if this access creates
5708 a dependency with an earlier instruction in the same group. */
5711 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
5713 int need_barrier
= 0;
5715 gcc_assert (regno
< NUM_REGS
);
5717 if (! PR_REGNO_P (regno
))
5718 flags
.is_and
= flags
.is_or
= 0;
5724 rws_insn_set (regno
);
5725 write_count
= rws_sum
[regno
].write_count
;
5727 switch (write_count
)
5730 /* The register has not been written yet. */
5731 if (!in_safe_group_barrier
)
5732 rws_update (regno
, flags
, pred
);
5736 /* The register has been written via a predicate. If this is
5737 not a complementary predicate, then we need a barrier. */
5738 /* ??? This assumes that P and P+1 are always complementary
5739 predicates for P even. */
5740 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5742 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5744 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
5746 if (!in_safe_group_barrier
)
5747 rws_update (regno
, flags
, pred
);
5751 /* The register has been unconditionally written already. We
5753 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5755 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5759 if (!in_safe_group_barrier
)
5761 rws_sum
[regno
].written_by_and
= flags
.is_and
;
5762 rws_sum
[regno
].written_by_or
= flags
.is_or
;
5772 if (flags
.is_branch
)
5774 /* Branches have several RAW exceptions that allow to avoid
5777 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
5778 /* RAW dependencies on branch regs are permissible as long
5779 as the writer is a non-branch instruction. Since we
5780 never generate code that uses a branch register written
5781 by a branch instruction, handling this case is
5785 if (REGNO_REG_CLASS (regno
) == PR_REGS
5786 && ! rws_sum
[regno
].written_by_fp
)
5787 /* The predicates of a branch are available within the
5788 same insn group as long as the predicate was written by
5789 something other than a floating-point instruction. */
5793 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
5795 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
5798 switch (rws_sum
[regno
].write_count
)
5801 /* The register has not been written yet. */
5805 /* The register has been written via a predicate. If this is
5806 not a complementary predicate, then we need a barrier. */
5807 /* ??? This assumes that P and P+1 are always complementary
5808 predicates for P even. */
5809 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
5814 /* The register has been unconditionally written already. We
5824 return need_barrier
;
5828 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
5830 int regno
= REGNO (reg
);
5831 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
5834 return rws_access_regno (regno
, flags
, pred
);
5837 int need_barrier
= 0;
5839 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
5840 return need_barrier
;
5844 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5845 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5848 update_set_flags (rtx x
, struct reg_flags
*pflags
)
5850 rtx src
= SET_SRC (x
);
5852 switch (GET_CODE (src
))
5858 /* There are four cases here:
5859 (1) The destination is (pc), in which case this is a branch,
5860 nothing here applies.
5861 (2) The destination is ar.lc, in which case this is a
5862 doloop_end_internal,
5863 (3) The destination is an fp register, in which case this is
5864 an fselect instruction.
5865 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5866 this is a check load.
5867 In all cases, nothing we do in this function applies. */
5871 if (COMPARISON_P (src
)
5872 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
5873 /* Set pflags->is_fp to 1 so that we know we're dealing
5874 with a floating point comparison when processing the
5875 destination of the SET. */
5878 /* Discover if this is a parallel comparison. We only handle
5879 and.orcm and or.andcm at present, since we must retain a
5880 strict inverse on the predicate pair. */
5881 else if (GET_CODE (src
) == AND
)
5883 else if (GET_CODE (src
) == IOR
)
5890 /* Subroutine of rtx_needs_barrier; this function determines whether the
5891 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5892 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5896 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
5898 int need_barrier
= 0;
5900 rtx src
= SET_SRC (x
);
5902 if (GET_CODE (src
) == CALL
)
5903 /* We don't need to worry about the result registers that
5904 get written by subroutine call. */
5905 return rtx_needs_barrier (src
, flags
, pred
);
5906 else if (SET_DEST (x
) == pc_rtx
)
5908 /* X is a conditional branch. */
5909 /* ??? This seems redundant, as the caller sets this bit for
5911 if (!ia64_spec_check_src_p (src
))
5912 flags
.is_branch
= 1;
5913 return rtx_needs_barrier (src
, flags
, pred
);
5916 if (ia64_spec_check_src_p (src
))
5917 /* Avoid checking one register twice (in condition
5918 and in 'then' section) for ldc pattern. */
5920 gcc_assert (REG_P (XEXP (src
, 2)));
5921 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
5923 /* We process MEM below. */
5924 src
= XEXP (src
, 1);
5927 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
5930 if (GET_CODE (dst
) == ZERO_EXTRACT
)
5932 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
5933 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
5935 return need_barrier
;
5938 /* Handle an access to rtx X of type FLAGS using predicate register
5939 PRED. Return 1 if this access creates a dependency with an earlier
5940 instruction in the same group. */
5943 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
5946 int is_complemented
= 0;
5947 int need_barrier
= 0;
5948 const char *format_ptr
;
5949 struct reg_flags new_flags
;
5957 switch (GET_CODE (x
))
5960 update_set_flags (x
, &new_flags
);
5961 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
5962 if (GET_CODE (SET_SRC (x
)) != CALL
)
5964 new_flags
.is_write
= 1;
5965 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
5970 new_flags
.is_write
= 0;
5971 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5973 /* Avoid multiple register writes, in case this is a pattern with
5974 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5975 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
5977 new_flags
.is_write
= 1;
5978 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
5979 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
5980 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5985 /* X is a predicated instruction. */
5987 cond
= COND_EXEC_TEST (x
);
5989 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
5991 if (GET_CODE (cond
) == EQ
)
5992 is_complemented
= 1;
5993 cond
= XEXP (cond
, 0);
5994 gcc_assert (GET_CODE (cond
) == REG
5995 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
5996 pred
= REGNO (cond
);
5997 if (is_complemented
)
6000 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6001 return need_barrier
;
6005 /* Clobber & use are for earlier compiler-phases only. */
6010 /* We always emit stop bits for traditional asms. We emit stop bits
6011 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6012 if (GET_CODE (x
) != ASM_OPERANDS
6013 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6015 /* Avoid writing the register multiple times if we have multiple
6016 asm outputs. This avoids a failure in rws_access_reg. */
6017 if (! rws_insn_test (REG_VOLATILE
))
6019 new_flags
.is_write
= 1;
6020 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6025 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6026 We cannot just fall through here since then we would be confused
6027 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6028 traditional asms unlike their normal usage. */
6030 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6031 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6036 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6038 rtx pat
= XVECEXP (x
, 0, i
);
6039 switch (GET_CODE (pat
))
6042 update_set_flags (pat
, &new_flags
);
6043 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6049 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6060 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6062 rtx pat
= XVECEXP (x
, 0, i
);
6063 if (GET_CODE (pat
) == SET
)
6065 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6067 new_flags
.is_write
= 1;
6068 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6072 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6073 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6078 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6081 if (REGNO (x
) == AR_UNAT_REGNUM
)
6083 for (i
= 0; i
< 64; ++i
)
6084 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6087 need_barrier
= rws_access_reg (x
, flags
, pred
);
6091 /* Find the regs used in memory address computation. */
6092 new_flags
.is_write
= 0;
6093 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6096 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6097 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6100 /* Operators with side-effects. */
6101 case POST_INC
: case POST_DEC
:
6102 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6104 new_flags
.is_write
= 0;
6105 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6106 new_flags
.is_write
= 1;
6107 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6111 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6113 new_flags
.is_write
= 0;
6114 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6115 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6116 new_flags
.is_write
= 1;
6117 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6120 /* Handle common unary and binary ops for efficiency. */
6121 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6122 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6123 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6124 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6125 case NE
: case EQ
: case GE
: case GT
: case LE
:
6126 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6127 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6128 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6131 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6132 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6133 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6134 case SQRT
: case FFS
: case POPCOUNT
:
6135 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6139 /* VEC_SELECT's second argument is a PARALLEL with integers that
6140 describe the elements selected. On ia64, those integers are
6141 always constants. Avoid walking the PARALLEL so that we don't
6142 get confused with "normal" parallels and then die. */
6143 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6147 switch (XINT (x
, 1))
6149 case UNSPEC_LTOFF_DTPMOD
:
6150 case UNSPEC_LTOFF_DTPREL
:
6152 case UNSPEC_LTOFF_TPREL
:
6154 case UNSPEC_PRED_REL_MUTEX
:
6155 case UNSPEC_PIC_CALL
:
6157 case UNSPEC_FETCHADD_ACQ
:
6158 case UNSPEC_BSP_VALUE
:
6159 case UNSPEC_FLUSHRS
:
6160 case UNSPEC_BUNDLE_SELECTOR
:
6163 case UNSPEC_GR_SPILL
:
6164 case UNSPEC_GR_RESTORE
:
6166 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6167 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6169 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6170 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6171 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6176 case UNSPEC_FR_SPILL
:
6177 case UNSPEC_FR_RESTORE
:
6178 case UNSPEC_GETF_EXP
:
6179 case UNSPEC_SETF_EXP
:
6181 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6182 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6187 case UNSPEC_CHKACLR
:
6189 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6192 case UNSPEC_FR_RECIP_APPROX
:
6194 case UNSPEC_COPYSIGN
:
6195 case UNSPEC_FR_RECIP_APPROX_RES
:
6196 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6197 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6200 case UNSPEC_CMPXCHG_ACQ
:
6201 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6202 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6210 case UNSPEC_VOLATILE
:
6211 switch (XINT (x
, 1))
6214 /* Alloc must always be the first instruction of a group.
6215 We force this by always returning true. */
6216 /* ??? We might get better scheduling if we explicitly check for
6217 input/local/output register dependencies, and modify the
6218 scheduler so that alloc is always reordered to the start of
6219 the current group. We could then eliminate all of the
6220 first_instruction code. */
6221 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6223 new_flags
.is_write
= 1;
6224 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6227 case UNSPECV_SET_BSP
:
6231 case UNSPECV_BLOCKAGE
:
6232 case UNSPECV_INSN_GROUP_BARRIER
:
6234 case UNSPECV_PSAC_ALL
:
6235 case UNSPECV_PSAC_NORMAL
:
6244 new_flags
.is_write
= 0;
6245 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6246 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6248 new_flags
.is_write
= 1;
6249 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6250 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6254 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6255 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6256 switch (format_ptr
[i
])
6258 case '0': /* unused field */
6259 case 'i': /* integer */
6260 case 'n': /* note */
6261 case 'w': /* wide integer */
6262 case 's': /* pointer to string */
6263 case 'S': /* optional pointer to string */
6267 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6272 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6273 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6282 return need_barrier
;
6285 /* Clear out the state for group_barrier_needed at the start of a
6286 sequence of insns. */
6289 init_insn_group_barriers (void)
6291 memset (rws_sum
, 0, sizeof (rws_sum
));
6292 first_instruction
= 1;
6295 /* Given the current state, determine whether a group barrier (a stop bit) is
6296 necessary before INSN. Return nonzero if so. This modifies the state to
6297 include the effects of INSN as a side-effect. */
6300 group_barrier_needed (rtx insn
)
6303 int need_barrier
= 0;
6304 struct reg_flags flags
;
6306 memset (&flags
, 0, sizeof (flags
));
6307 switch (GET_CODE (insn
))
6314 /* A barrier doesn't imply an instruction group boundary. */
6318 memset (rws_insn
, 0, sizeof (rws_insn
));
6322 flags
.is_branch
= 1;
6323 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6324 memset (rws_insn
, 0, sizeof (rws_insn
));
6326 /* Don't bundle a call following another call. */
6327 if ((pat
= prev_active_insn (insn
))
6328 && GET_CODE (pat
) == CALL_INSN
)
6334 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6338 if (!ia64_spec_check_p (insn
))
6339 flags
.is_branch
= 1;
6341 /* Don't bundle a jump following a call. */
6342 if ((pat
= prev_active_insn (insn
))
6343 && GET_CODE (pat
) == CALL_INSN
)
6351 if (GET_CODE (PATTERN (insn
)) == USE
6352 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6353 /* Don't care about USE and CLOBBER "insns"---those are used to
6354 indicate to the optimizer that it shouldn't get rid of
6355 certain operations. */
6358 pat
= PATTERN (insn
);
6360 /* Ug. Hack hacks hacked elsewhere. */
6361 switch (recog_memoized (insn
))
6363 /* We play dependency tricks with the epilogue in order
6364 to get proper schedules. Undo this for dv analysis. */
6365 case CODE_FOR_epilogue_deallocate_stack
:
6366 case CODE_FOR_prologue_allocate_stack
:
6367 pat
= XVECEXP (pat
, 0, 0);
6370 /* The pattern we use for br.cloop confuses the code above.
6371 The second element of the vector is representative. */
6372 case CODE_FOR_doloop_end_internal
:
6373 pat
= XVECEXP (pat
, 0, 1);
6376 /* Doesn't generate code. */
6377 case CODE_FOR_pred_rel_mutex
:
6378 case CODE_FOR_prologue_use
:
6385 memset (rws_insn
, 0, sizeof (rws_insn
));
6386 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
6388 /* Check to see if the previous instruction was a volatile
6391 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
6399 if (first_instruction
&& INSN_P (insn
)
6400 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6401 && GET_CODE (PATTERN (insn
)) != USE
6402 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6405 first_instruction
= 0;
6408 return need_barrier
;
6411 /* Like group_barrier_needed, but do not clobber the current state. */
6414 safe_group_barrier_needed (rtx insn
)
6416 int saved_first_instruction
;
6419 saved_first_instruction
= first_instruction
;
6420 in_safe_group_barrier
= 1;
6422 t
= group_barrier_needed (insn
);
6424 first_instruction
= saved_first_instruction
;
6425 in_safe_group_barrier
= 0;
6430 /* Scan the current function and insert stop bits as necessary to
6431 eliminate dependencies. This function assumes that a final
6432 instruction scheduling pass has been run which has already
6433 inserted most of the necessary stop bits. This function only
6434 inserts new ones at basic block boundaries, since these are
6435 invisible to the scheduler. */
6438 emit_insn_group_barriers (FILE *dump
)
6442 int insns_since_last_label
= 0;
6444 init_insn_group_barriers ();
6446 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6448 if (GET_CODE (insn
) == CODE_LABEL
)
6450 if (insns_since_last_label
)
6452 insns_since_last_label
= 0;
6454 else if (GET_CODE (insn
) == NOTE
6455 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
6457 if (insns_since_last_label
)
6459 insns_since_last_label
= 0;
6461 else if (GET_CODE (insn
) == INSN
6462 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6463 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
6465 init_insn_group_barriers ();
6468 else if (NONDEBUG_INSN_P (insn
))
6470 insns_since_last_label
= 1;
6472 if (group_barrier_needed (insn
))
6477 fprintf (dump
, "Emitting stop before label %d\n",
6478 INSN_UID (last_label
));
6479 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
6482 init_insn_group_barriers ();
6490 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6491 This function has to emit all necessary group barriers. */
6494 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
6498 init_insn_group_barriers ();
6500 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6502 if (GET_CODE (insn
) == BARRIER
)
6504 rtx last
= prev_active_insn (insn
);
6508 if (GET_CODE (last
) == JUMP_INSN
6509 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
6510 last
= prev_active_insn (last
);
6511 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6512 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
6514 init_insn_group_barriers ();
6516 else if (NONDEBUG_INSN_P (insn
))
6518 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
6519 init_insn_group_barriers ();
6520 else if (group_barrier_needed (insn
))
6522 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
6523 init_insn_group_barriers ();
6524 group_barrier_needed (insn
);
6532 /* Instruction scheduling support. */
6534 #define NR_BUNDLES 10
6536 /* A list of names of all available bundles. */
6538 static const char *bundle_name
[NR_BUNDLES
] =
6544 #if NR_BUNDLES == 10
6554 /* Nonzero if we should insert stop bits into the schedule. */
6556 int ia64_final_schedule
= 0;
6558 /* Codes of the corresponding queried units: */
6560 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
6561 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
6563 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
6564 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
6566 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
6568 /* The following variable value is an insn group barrier. */
6570 static rtx dfa_stop_insn
;
6572 /* The following variable value is the last issued insn. */
6574 static rtx last_scheduled_insn
;
6576 /* The following variable value is pointer to a DFA state used as
6577 temporary variable. */
6579 static state_t temp_dfa_state
= NULL
;
6581 /* The following variable value is DFA state after issuing the last
6584 static state_t prev_cycle_state
= NULL
;
6586 /* The following array element values are TRUE if the corresponding
6587 insn requires to add stop bits before it. */
6589 static char *stops_p
= NULL
;
6591 /* The following variable is used to set up the mentioned above array. */
6593 static int stop_before_p
= 0;
6595 /* The following variable value is length of the arrays `clocks' and
6598 static int clocks_length
;
6600 /* The following variable value is number of data speculations in progress. */
6601 static int pending_data_specs
= 0;
6603 /* Number of memory references on current and three future processor cycles. */
6604 static char mem_ops_in_group
[4];
6606 /* Number of current processor cycle (from scheduler's point of view). */
6607 static int current_cycle
;
6609 static rtx
ia64_single_set (rtx
);
6610 static void ia64_emit_insn_before (rtx
, rtx
);
6612 /* Map a bundle number to its pseudo-op. */
6615 get_bundle_name (int b
)
6617 return bundle_name
[b
];
6621 /* Return the maximum number of instructions a cpu can issue. */
6624 ia64_issue_rate (void)
6629 /* Helper function - like single_set, but look inside COND_EXEC. */
6632 ia64_single_set (rtx insn
)
6634 rtx x
= PATTERN (insn
), ret
;
6635 if (GET_CODE (x
) == COND_EXEC
)
6636 x
= COND_EXEC_CODE (x
);
6637 if (GET_CODE (x
) == SET
)
6640 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6641 Although they are not classical single set, the second set is there just
6642 to protect it from moving past FP-relative stack accesses. */
6643 switch (recog_memoized (insn
))
6645 case CODE_FOR_prologue_allocate_stack
:
6646 case CODE_FOR_epilogue_deallocate_stack
:
6647 ret
= XVECEXP (x
, 0, 0);
6651 ret
= single_set_2 (insn
, x
);
6658 /* Adjust the cost of a scheduling dependency.
6659 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6660 COST is the current cost, DW is dependency weakness. */
6662 ia64_adjust_cost_2 (rtx insn
, int dep_type1
, rtx dep_insn
, int cost
, dw_t dw
)
6664 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
6665 enum attr_itanium_class dep_class
;
6666 enum attr_itanium_class insn_class
;
6668 insn_class
= ia64_safe_itanium_class (insn
);
6669 dep_class
= ia64_safe_itanium_class (dep_insn
);
6671 /* Treat true memory dependencies separately. Ignore apparent true
6672 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6673 if (dep_type
== REG_DEP_TRUE
6674 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
6675 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
6678 if (dw
== MIN_DEP_WEAK
)
6679 /* Store and load are likely to alias, use higher cost to avoid stall. */
6680 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST
);
6681 else if (dw
> MIN_DEP_WEAK
)
6683 /* Store and load are less likely to alias. */
6684 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
6685 /* Assume there will be no cache conflict for floating-point data.
6686 For integer data, L1 conflict penalty is huge (17 cycles), so we
6687 never assume it will not cause a conflict. */
6693 if (dep_type
!= REG_DEP_OUTPUT
)
6696 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
6697 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
6703 /* Like emit_insn_before, but skip cycle_display notes.
6704 ??? When cycle display notes are implemented, update this. */
6707 ia64_emit_insn_before (rtx insn
, rtx before
)
6709 emit_insn_before (insn
, before
);
6712 /* The following function marks insns who produce addresses for load
6713 and store insns. Such insns will be placed into M slots because it
6714 decrease latency time for Itanium1 (see function
6715 `ia64_produce_address_p' and the DFA descriptions). */
6718 ia64_dependencies_evaluation_hook (rtx head
, rtx tail
)
6720 rtx insn
, next
, next_tail
;
6722 /* Before reload, which_alternative is not set, which means that
6723 ia64_safe_itanium_class will produce wrong results for (at least)
6724 move instructions. */
6725 if (!reload_completed
)
6728 next_tail
= NEXT_INSN (tail
);
6729 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6732 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
6734 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
6736 sd_iterator_def sd_it
;
6738 bool has_mem_op_consumer_p
= false;
6740 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
6742 enum attr_itanium_class c
;
6744 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
6747 next
= DEP_CON (dep
);
6748 c
= ia64_safe_itanium_class (next
);
6749 if ((c
== ITANIUM_CLASS_ST
6750 || c
== ITANIUM_CLASS_STF
)
6751 && ia64_st_address_bypass_p (insn
, next
))
6753 has_mem_op_consumer_p
= true;
6756 else if ((c
== ITANIUM_CLASS_LD
6757 || c
== ITANIUM_CLASS_FLD
6758 || c
== ITANIUM_CLASS_FLDP
)
6759 && ia64_ld_address_bypass_p (insn
, next
))
6761 has_mem_op_consumer_p
= true;
6766 insn
->call
= has_mem_op_consumer_p
;
6770 /* We're beginning a new block. Initialize data structures as necessary. */
6773 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
6774 int sched_verbose ATTRIBUTE_UNUSED
,
6775 int max_ready ATTRIBUTE_UNUSED
)
6777 #ifdef ENABLE_CHECKING
6780 if (!sel_sched_p () && reload_completed
)
6781 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6782 insn
!= current_sched_info
->next_tail
;
6783 insn
= NEXT_INSN (insn
))
6784 gcc_assert (!SCHED_GROUP_P (insn
));
6786 last_scheduled_insn
= NULL_RTX
;
6787 init_insn_group_barriers ();
6790 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
6793 /* We're beginning a scheduling pass. Check assertion. */
6796 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
6797 int sched_verbose ATTRIBUTE_UNUSED
,
6798 int max_ready ATTRIBUTE_UNUSED
)
6800 gcc_assert (pending_data_specs
== 0);
6803 /* Scheduling pass is now finished. Free/reset static variable. */
6805 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
6806 int sched_verbose ATTRIBUTE_UNUSED
)
6808 gcc_assert (pending_data_specs
== 0);
6811 /* Return TRUE if INSN is a load (either normal or speculative, but not a
6812 speculation check), FALSE otherwise. */
6814 is_load_p (rtx insn
)
6816 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
6819 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
6820 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
6823 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6824 (taking account for 3-cycle cache reference postponing for stores: Intel
6825 Itanium 2 Reference Manual for Software Development and Optimization,
6828 record_memory_reference (rtx insn
)
6830 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
6832 switch (insn_class
) {
6833 case ITANIUM_CLASS_FLD
:
6834 case ITANIUM_CLASS_LD
:
6835 mem_ops_in_group
[current_cycle
% 4]++;
6837 case ITANIUM_CLASS_STF
:
6838 case ITANIUM_CLASS_ST
:
6839 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
6845 /* We are about to being issuing insns for this clock cycle.
6846 Override the default sort algorithm to better slot instructions. */
6849 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
,
6850 int *pn_ready
, int clock_var
,
6854 int n_ready
= *pn_ready
;
6855 rtx
*e_ready
= ready
+ n_ready
;
6859 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
6861 if (reorder_type
== 0)
6863 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6865 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6866 if (insnp
< e_ready
)
6869 enum attr_type t
= ia64_safe_type (insn
);
6870 if (t
== TYPE_UNKNOWN
)
6872 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6873 || asm_noperands (PATTERN (insn
)) >= 0)
6875 rtx lowest
= ready
[n_asms
];
6876 ready
[n_asms
] = insn
;
6882 rtx highest
= ready
[n_ready
- 1];
6883 ready
[n_ready
- 1] = insn
;
6890 if (n_asms
< n_ready
)
6892 /* Some normal insns to process. Skip the asms. */
6896 else if (n_ready
> 0)
6900 if (ia64_final_schedule
)
6903 int nr_need_stop
= 0;
6905 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
6906 if (safe_group_barrier_needed (*insnp
))
6909 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
6911 if (reorder_type
== 0)
6914 /* Move down everything that needs a stop bit, preserving
6916 while (insnp
-- > ready
+ deleted
)
6917 while (insnp
>= ready
+ deleted
)
6920 if (! safe_group_barrier_needed (insn
))
6922 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6930 current_cycle
= clock_var
;
6931 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
6936 /* Move down loads/stores, preserving relative order. */
6937 while (insnp
-- > ready
+ moved
)
6938 while (insnp
>= ready
+ moved
)
6941 if (! is_load_p (insn
))
6943 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
6954 /* We are about to being issuing insns for this clock cycle. Override
6955 the default sort algorithm to better slot instructions. */
6958 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
6961 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
6962 pn_ready
, clock_var
, 0);
6965 /* Like ia64_sched_reorder, but called after issuing each insn.
6966 Override the default sort algorithm to better slot instructions. */
6969 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
6970 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
6971 int *pn_ready
, int clock_var
)
6973 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
6977 /* We are about to issue INSN. Return the number of insns left on the
6978 ready queue that can be issued this cycle. */
6981 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
6982 int sched_verbose ATTRIBUTE_UNUSED
,
6983 rtx insn ATTRIBUTE_UNUSED
,
6984 int can_issue_more ATTRIBUTE_UNUSED
)
6986 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
6987 /* Modulo scheduling does not extend h_i_d when emitting
6988 new instructions. Don't use h_i_d, if we don't have to. */
6990 if (DONE_SPEC (insn
) & BEGIN_DATA
)
6991 pending_data_specs
++;
6992 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
6993 pending_data_specs
--;
6996 if (DEBUG_INSN_P (insn
))
6999 last_scheduled_insn
= insn
;
7000 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7001 if (reload_completed
)
7003 int needed
= group_barrier_needed (insn
);
7005 gcc_assert (!needed
);
7006 if (GET_CODE (insn
) == CALL_INSN
)
7007 init_insn_group_barriers ();
7008 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7011 record_memory_reference (insn
);
7016 /* We are choosing insn from the ready queue. Return nonzero if INSN
7020 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn
)
7022 gcc_assert (insn
&& INSN_P (insn
));
7023 return ((!reload_completed
7024 || !safe_group_barrier_needed (insn
))
7025 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn
)
7026 && (!mflag_sched_mem_insns_hard_limit
7027 || !is_load_p (insn
)
7028 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
));
7031 /* We are choosing insn from the ready queue. Return nonzero if INSN
7035 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn
)
7037 gcc_assert (insn
&& INSN_P (insn
));
7038 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7039 we keep ALAT half-empty. */
7040 return (pending_data_specs
< 16
7041 || !(TODO_SPEC (insn
) & BEGIN_DATA
));
7044 /* The following variable value is pseudo-insn used by the DFA insn
7045 scheduler to change the DFA state when the simulated clock is
7048 static rtx dfa_pre_cycle_insn
;
7050 /* Returns 1 when a meaningful insn was scheduled between the last group
7051 barrier and LAST. */
7053 scheduled_good_insn (rtx last
)
7055 if (last
&& recog_memoized (last
) >= 0)
7059 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7060 && !stops_p
[INSN_UID (last
)];
7061 last
= PREV_INSN (last
))
7062 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7063 the ebb we're scheduling. */
7064 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7070 /* We are about to being issuing INSN. Return nonzero if we cannot
7071 issue it on given cycle CLOCK and return zero if we should not sort
7072 the ready queue on the next clock start. */
7075 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx insn
, int last_clock
,
7076 int clock
, int *sort_p
)
7078 int setup_clocks_p
= FALSE
;
7080 gcc_assert (insn
&& INSN_P (insn
));
7082 if (DEBUG_INSN_P (insn
))
7085 /* When a group barrier is needed for insn, last_scheduled_insn
7087 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7088 || last_scheduled_insn
);
7090 if ((reload_completed
7091 && (safe_group_barrier_needed (insn
)
7092 || (mflag_sched_stop_bits_after_every_cycle
7093 && last_clock
!= clock
7094 && last_scheduled_insn
7095 && scheduled_good_insn (last_scheduled_insn
))))
7096 || (last_scheduled_insn
7097 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
7098 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
7099 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
7101 init_insn_group_barriers ();
7103 if (verbose
&& dump
)
7104 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7105 last_clock
== clock
? " + cycle advance" : "");
7108 current_cycle
= clock
;
7109 mem_ops_in_group
[current_cycle
% 4] = 0;
7111 if (last_clock
== clock
)
7113 state_transition (curr_state
, dfa_stop_insn
);
7114 if (TARGET_EARLY_STOP_BITS
)
7115 *sort_p
= (last_scheduled_insn
== NULL_RTX
7116 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
7121 else if (reload_completed
)
7122 setup_clocks_p
= TRUE
;
7124 if (last_scheduled_insn
)
7126 if (GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
7127 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)
7128 state_reset (curr_state
);
7131 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7132 state_transition (curr_state
, dfa_stop_insn
);
7133 state_transition (curr_state
, dfa_pre_cycle_insn
);
7134 state_transition (curr_state
, NULL
);
7138 else if (reload_completed
)
7139 setup_clocks_p
= TRUE
;
7144 /* Implement targetm.sched.h_i_d_extended hook.
7145 Extend internal data structures. */
7147 ia64_h_i_d_extended (void)
7149 if (stops_p
!= NULL
)
7151 int new_clocks_length
= get_max_uid () * 3 / 2;
7152 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7153 clocks_length
= new_clocks_length
;
7158 /* This structure describes the data used by the backend to guide scheduling.
7159 When the current scheduling point is switched, this data should be saved
7160 and restored later, if the scheduler returns to this point. */
7161 struct _ia64_sched_context
7163 state_t prev_cycle_state
;
7164 rtx last_scheduled_insn
;
7165 struct reg_write_state rws_sum
[NUM_REGS
];
7166 struct reg_write_state rws_insn
[NUM_REGS
];
7167 int first_instruction
;
7168 int pending_data_specs
;
7170 char mem_ops_in_group
[4];
7172 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7174 /* Allocates a scheduling context. */
7176 ia64_alloc_sched_context (void)
7178 return xmalloc (sizeof (struct _ia64_sched_context
));
7181 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7182 the global context otherwise. */
7184 ia64_init_sched_context (void *_sc
, bool clean_p
)
7186 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7188 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7191 state_reset (sc
->prev_cycle_state
);
7192 sc
->last_scheduled_insn
= NULL_RTX
;
7193 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7194 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7195 sc
->first_instruction
= 1;
7196 sc
->pending_data_specs
= 0;
7197 sc
->current_cycle
= 0;
7198 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7202 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7203 sc
->last_scheduled_insn
= last_scheduled_insn
;
7204 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7205 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7206 sc
->first_instruction
= first_instruction
;
7207 sc
->pending_data_specs
= pending_data_specs
;
7208 sc
->current_cycle
= current_cycle
;
7209 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7213 /* Sets the global scheduling context to the one pointed to by _SC. */
7215 ia64_set_sched_context (void *_sc
)
7217 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7219 gcc_assert (sc
!= NULL
);
7221 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7222 last_scheduled_insn
= sc
->last_scheduled_insn
;
7223 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7224 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7225 first_instruction
= sc
->first_instruction
;
7226 pending_data_specs
= sc
->pending_data_specs
;
7227 current_cycle
= sc
->current_cycle
;
7228 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7231 /* Clears the data in the _SC scheduling context. */
7233 ia64_clear_sched_context (void *_sc
)
7235 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7237 free (sc
->prev_cycle_state
);
7238 sc
->prev_cycle_state
= NULL
;
7241 /* Frees the _SC scheduling context. */
7243 ia64_free_sched_context (void *_sc
)
7245 gcc_assert (_sc
!= NULL
);
7250 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7252 /* Return a function that will generate a load of mode MODE_NO
7253 with speculation types TS. */
7255 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7257 static gen_func_t gen_ld_
[] = {
7267 gen_zero_extendqidi2
,
7268 gen_zero_extendhidi2
,
7269 gen_zero_extendsidi2
,
7272 static gen_func_t gen_ld_a
[] = {
7282 gen_zero_extendqidi2_advanced
,
7283 gen_zero_extendhidi2_advanced
,
7284 gen_zero_extendsidi2_advanced
,
7286 static gen_func_t gen_ld_s
[] = {
7287 gen_movbi_speculative
,
7288 gen_movqi_speculative
,
7289 gen_movhi_speculative
,
7290 gen_movsi_speculative
,
7291 gen_movdi_speculative
,
7292 gen_movsf_speculative
,
7293 gen_movdf_speculative
,
7294 gen_movxf_speculative
,
7295 gen_movti_speculative
,
7296 gen_zero_extendqidi2_speculative
,
7297 gen_zero_extendhidi2_speculative
,
7298 gen_zero_extendsidi2_speculative
,
7300 static gen_func_t gen_ld_sa
[] = {
7301 gen_movbi_speculative_advanced
,
7302 gen_movqi_speculative_advanced
,
7303 gen_movhi_speculative_advanced
,
7304 gen_movsi_speculative_advanced
,
7305 gen_movdi_speculative_advanced
,
7306 gen_movsf_speculative_advanced
,
7307 gen_movdf_speculative_advanced
,
7308 gen_movxf_speculative_advanced
,
7309 gen_movti_speculative_advanced
,
7310 gen_zero_extendqidi2_speculative_advanced
,
7311 gen_zero_extendhidi2_speculative_advanced
,
7312 gen_zero_extendsidi2_speculative_advanced
,
7314 static gen_func_t gen_ld_s_a
[] = {
7315 gen_movbi_speculative_a
,
7316 gen_movqi_speculative_a
,
7317 gen_movhi_speculative_a
,
7318 gen_movsi_speculative_a
,
7319 gen_movdi_speculative_a
,
7320 gen_movsf_speculative_a
,
7321 gen_movdf_speculative_a
,
7322 gen_movxf_speculative_a
,
7323 gen_movti_speculative_a
,
7324 gen_zero_extendqidi2_speculative_a
,
7325 gen_zero_extendhidi2_speculative_a
,
7326 gen_zero_extendsidi2_speculative_a
,
7331 if (ts
& BEGIN_DATA
)
7333 if (ts
& BEGIN_CONTROL
)
7338 else if (ts
& BEGIN_CONTROL
)
7340 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7341 || ia64_needs_block_p (ts
))
7344 gen_ld
= gen_ld_s_a
;
7351 return gen_ld
[mode_no
];
7354 /* Constants that help mapping 'enum machine_mode' to int. */
7357 SPEC_MODE_INVALID
= -1,
7358 SPEC_MODE_FIRST
= 0,
7359 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7360 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7366 /* Offset to reach ZERO_EXTEND patterns. */
7367 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
7370 /* Return index of the MODE. */
7372 ia64_mode_to_int (enum machine_mode mode
)
7376 case BImode
: return 0; /* SPEC_MODE_FIRST */
7377 case QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7378 case HImode
: return 2;
7379 case SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7380 case DImode
: return 4;
7381 case SFmode
: return 5;
7382 case DFmode
: return 6;
7383 case XFmode
: return 7;
7385 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7386 mentioned in itanium[12].md. Predicate fp_register_operand also
7387 needs to be defined. Bottom line: better disable for now. */
7388 return SPEC_MODE_INVALID
;
7389 default: return SPEC_MODE_INVALID
;
7393 /* Provide information about speculation capabilities. */
7395 ia64_set_sched_flags (spec_info_t spec_info
)
7397 unsigned int *flags
= &(current_sched_info
->flags
);
7399 if (*flags
& SCHED_RGN
7400 || *flags
& SCHED_EBB
7401 || *flags
& SEL_SCHED
)
7405 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
7406 || (mflag_sched_ar_data_spec
&& reload_completed
))
7411 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
7412 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
7416 if (mflag_sched_control_spec
7418 || reload_completed
))
7420 mask
|= BEGIN_CONTROL
;
7422 if (!sel_sched_p () && mflag_sched_in_control_spec
)
7423 mask
|= BE_IN_CONTROL
;
7426 spec_info
->mask
= mask
;
7430 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
7432 if (mask
& BE_IN_SPEC
)
7435 spec_info
->flags
= 0;
7437 if ((mask
& DATA_SPEC
) && mflag_sched_prefer_non_data_spec_insns
)
7438 spec_info
->flags
|= PREFER_NON_DATA_SPEC
;
7440 if (mask
& CONTROL_SPEC
)
7442 if (mflag_sched_prefer_non_control_spec_insns
)
7443 spec_info
->flags
|= PREFER_NON_CONTROL_SPEC
;
7445 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
7446 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
7449 if (sched_verbose
>= 1)
7450 spec_info
->dump
= sched_dump
;
7452 spec_info
->dump
= 0;
7454 if (mflag_sched_count_spec_in_critical_path
)
7455 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
7459 spec_info
->mask
= 0;
7462 /* If INSN is an appropriate load return its mode.
7463 Return -1 otherwise. */
7465 get_mode_no_for_insn (rtx insn
)
7467 rtx reg
, mem
, mode_rtx
;
7471 extract_insn_cached (insn
);
7473 /* We use WHICH_ALTERNATIVE only after reload. This will
7474 guarantee that reload won't touch a speculative insn. */
7476 if (recog_data
.n_operands
!= 2)
7479 reg
= recog_data
.operand
[0];
7480 mem
= recog_data
.operand
[1];
7482 /* We should use MEM's mode since REG's mode in presence of
7483 ZERO_EXTEND will always be DImode. */
7484 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
7485 /* Process non-speculative ld. */
7487 if (!reload_completed
)
7489 /* Do not speculate into regs like ar.lc. */
7490 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
7497 rtx mem_reg
= XEXP (mem
, 0);
7499 if (!REG_P (mem_reg
))
7505 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
7507 gcc_assert (REG_P (reg
) && MEM_P (mem
));
7513 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
7514 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
7515 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
7516 /* Process speculative ld or ld.c. */
7518 gcc_assert (REG_P (reg
) && MEM_P (mem
));
7523 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
7525 if (attr_class
== ITANIUM_CLASS_CHK_A
7526 || attr_class
== ITANIUM_CLASS_CHK_S_I
7527 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
7534 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
7536 if (mode_no
== SPEC_MODE_INVALID
)
7539 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
7543 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
7544 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
7547 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
7553 /* If X is an unspec part of a speculative load, return its code.
7554 Return -1 otherwise. */
7556 get_spec_unspec_code (const_rtx x
)
7558 if (GET_CODE (x
) != UNSPEC
)
7580 /* Implement skip_rtx_p hook. */
7582 ia64_skip_rtx_p (const_rtx x
)
7584 return get_spec_unspec_code (x
) != -1;
7587 /* If INSN is a speculative load, return its UNSPEC code.
7588 Return -1 otherwise. */
7590 get_insn_spec_code (const_rtx insn
)
7594 pat
= PATTERN (insn
);
7596 if (GET_CODE (pat
) == COND_EXEC
)
7597 pat
= COND_EXEC_CODE (pat
);
7599 if (GET_CODE (pat
) != SET
)
7602 reg
= SET_DEST (pat
);
7606 mem
= SET_SRC (pat
);
7607 if (GET_CODE (mem
) == ZERO_EXTEND
)
7608 mem
= XEXP (mem
, 0);
7610 return get_spec_unspec_code (mem
);
7613 /* If INSN is a speculative load, return a ds with the speculation types.
7614 Otherwise [if INSN is a normal instruction] return 0. */
7616 ia64_get_insn_spec_ds (rtx insn
)
7618 int code
= get_insn_spec_code (insn
);
7627 return BEGIN_CONTROL
;
7630 return BEGIN_DATA
| BEGIN_CONTROL
;
7637 /* If INSN is a speculative load return a ds with the speculation types that
7639 Otherwise [if INSN is a normal instruction] return 0. */
7641 ia64_get_insn_checked_ds (rtx insn
)
7643 int code
= get_insn_spec_code (insn
);
7648 return BEGIN_DATA
| BEGIN_CONTROL
;
7651 return BEGIN_CONTROL
;
7655 return BEGIN_DATA
| BEGIN_CONTROL
;
7662 /* If GEN_P is true, calculate the index of needed speculation check and return
7663 speculative pattern for INSN with speculative mode TS, machine mode
7664 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7665 If GEN_P is false, just calculate the index of needed speculation check. */
7667 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
7670 gen_func_t gen_load
;
7672 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
7674 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
7675 copy_rtx (recog_data
.operand
[1]));
7677 pat
= PATTERN (insn
);
7678 if (GET_CODE (pat
) == COND_EXEC
)
7679 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
7686 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
7687 ds_t ds ATTRIBUTE_UNUSED
)
7692 /* Implement targetm.sched.speculate_insn hook.
7693 Check if the INSN can be TS speculative.
7694 If 'no' - return -1.
7695 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7696 If current pattern of the INSN already provides TS speculation,
7699 ia64_speculate_insn (rtx insn
, ds_t ts
, rtx
*new_pat
)
7704 gcc_assert (!(ts
& ~SPECULATIVE
));
7706 if (ia64_spec_check_p (insn
))
7709 if ((ts
& BE_IN_SPEC
)
7710 && !insn_can_be_in_speculative_p (insn
, ts
))
7713 mode_no
= get_mode_no_for_insn (insn
);
7715 if (mode_no
!= SPEC_MODE_INVALID
)
7717 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
7722 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
7731 /* Return a function that will generate a check for speculation TS with mode
7733 If simple check is needed, pass true for SIMPLE_CHECK_P.
7734 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7736 get_spec_check_gen_function (ds_t ts
, int mode_no
,
7737 bool simple_check_p
, bool clearing_check_p
)
7739 static gen_func_t gen_ld_c_clr
[] = {
7749 gen_zero_extendqidi2_clr
,
7750 gen_zero_extendhidi2_clr
,
7751 gen_zero_extendsidi2_clr
,
7753 static gen_func_t gen_ld_c_nc
[] = {
7763 gen_zero_extendqidi2_nc
,
7764 gen_zero_extendhidi2_nc
,
7765 gen_zero_extendsidi2_nc
,
7767 static gen_func_t gen_chk_a_clr
[] = {
7768 gen_advanced_load_check_clr_bi
,
7769 gen_advanced_load_check_clr_qi
,
7770 gen_advanced_load_check_clr_hi
,
7771 gen_advanced_load_check_clr_si
,
7772 gen_advanced_load_check_clr_di
,
7773 gen_advanced_load_check_clr_sf
,
7774 gen_advanced_load_check_clr_df
,
7775 gen_advanced_load_check_clr_xf
,
7776 gen_advanced_load_check_clr_ti
,
7777 gen_advanced_load_check_clr_di
,
7778 gen_advanced_load_check_clr_di
,
7779 gen_advanced_load_check_clr_di
,
7781 static gen_func_t gen_chk_a_nc
[] = {
7782 gen_advanced_load_check_nc_bi
,
7783 gen_advanced_load_check_nc_qi
,
7784 gen_advanced_load_check_nc_hi
,
7785 gen_advanced_load_check_nc_si
,
7786 gen_advanced_load_check_nc_di
,
7787 gen_advanced_load_check_nc_sf
,
7788 gen_advanced_load_check_nc_df
,
7789 gen_advanced_load_check_nc_xf
,
7790 gen_advanced_load_check_nc_ti
,
7791 gen_advanced_load_check_nc_di
,
7792 gen_advanced_load_check_nc_di
,
7793 gen_advanced_load_check_nc_di
,
7795 static gen_func_t gen_chk_s
[] = {
7796 gen_speculation_check_bi
,
7797 gen_speculation_check_qi
,
7798 gen_speculation_check_hi
,
7799 gen_speculation_check_si
,
7800 gen_speculation_check_di
,
7801 gen_speculation_check_sf
,
7802 gen_speculation_check_df
,
7803 gen_speculation_check_xf
,
7804 gen_speculation_check_ti
,
7805 gen_speculation_check_di
,
7806 gen_speculation_check_di
,
7807 gen_speculation_check_di
,
7810 gen_func_t
*gen_check
;
7812 if (ts
& BEGIN_DATA
)
7814 /* We don't need recovery because even if this is ld.sa
7815 ALAT entry will be allocated only if NAT bit is set to zero.
7816 So it is enough to use ld.c here. */
7820 gcc_assert (mflag_sched_spec_ldc
);
7822 if (clearing_check_p
)
7823 gen_check
= gen_ld_c_clr
;
7825 gen_check
= gen_ld_c_nc
;
7829 if (clearing_check_p
)
7830 gen_check
= gen_chk_a_clr
;
7832 gen_check
= gen_chk_a_nc
;
7835 else if (ts
& BEGIN_CONTROL
)
7838 /* We might want to use ld.sa -> ld.c instead of
7841 gcc_assert (!ia64_needs_block_p (ts
));
7843 if (clearing_check_p
)
7844 gen_check
= gen_ld_c_clr
;
7846 gen_check
= gen_ld_c_nc
;
7850 gen_check
= gen_chk_s
;
7856 gcc_assert (mode_no
>= 0);
7857 return gen_check
[mode_no
];
7860 /* Return nonzero, if INSN needs branchy recovery check. */
7862 ia64_needs_block_p (ds_t ts
)
7864 if (ts
& BEGIN_DATA
)
7865 return !mflag_sched_spec_ldc
;
7867 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
7869 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
7872 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7873 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7874 Otherwise, generate a simple check. */
7876 ia64_gen_spec_check (rtx insn
, rtx label
, ds_t ds
)
7878 rtx op1
, pat
, check_pat
;
7879 gen_func_t gen_check
;
7882 mode_no
= get_mode_no_for_insn (insn
);
7883 gcc_assert (mode_no
>= 0);
7889 gcc_assert (!ia64_needs_block_p (ds
));
7890 op1
= copy_rtx (recog_data
.operand
[1]);
7893 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
7896 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
7898 pat
= PATTERN (insn
);
7899 if (GET_CODE (pat
) == COND_EXEC
)
7900 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
7906 /* Return nonzero, if X is branchy recovery check. */
7908 ia64_spec_check_p (rtx x
)
7911 if (GET_CODE (x
) == COND_EXEC
)
7912 x
= COND_EXEC_CODE (x
);
7913 if (GET_CODE (x
) == SET
)
7914 return ia64_spec_check_src_p (SET_SRC (x
));
7918 /* Return nonzero, if SRC belongs to recovery check. */
7920 ia64_spec_check_src_p (rtx src
)
7922 if (GET_CODE (src
) == IF_THEN_ELSE
)
7927 if (GET_CODE (t
) == NE
)
7931 if (GET_CODE (t
) == UNSPEC
)
7937 if (code
== UNSPEC_LDCCLR
7938 || code
== UNSPEC_LDCNC
7939 || code
== UNSPEC_CHKACLR
7940 || code
== UNSPEC_CHKANC
7941 || code
== UNSPEC_CHKS
)
7943 gcc_assert (code
!= 0);
7953 /* The following page contains abstract data `bundle states' which are
7954 used for bundling insns (inserting nops and template generation). */
7956 /* The following describes state of insn bundling. */
7960 /* Unique bundle state number to identify them in the debugging
7963 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
7964 /* number nops before and after the insn */
7965 short before_nops_num
, after_nops_num
;
7966 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
7968 int cost
; /* cost of the state in cycles */
7969 int accumulated_insns_num
; /* number of all previous insns including
7970 nops. L is considered as 2 insns */
7971 int branch_deviation
; /* deviation of previous branches from 3rd slots */
7972 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
7973 struct bundle_state
*next
; /* next state with the same insn_num */
7974 struct bundle_state
*originator
; /* originator (previous insn state) */
7975 /* All bundle states are in the following chain. */
7976 struct bundle_state
*allocated_states_chain
;
7977 /* The DFA State after issuing the insn and the nops. */
7981 /* The following is map insn number to the corresponding bundle state. */
7983 static struct bundle_state
**index_to_bundle_states
;
7985 /* The unique number of next bundle state. */
7987 static int bundle_states_num
;
7989 /* All allocated bundle states are in the following chain. */
7991 static struct bundle_state
*allocated_bundle_states_chain
;
7993 /* All allocated but not used bundle states are in the following
7996 static struct bundle_state
*free_bundle_state_chain
;
7999 /* The following function returns a free bundle state. */
8001 static struct bundle_state
*
8002 get_free_bundle_state (void)
8004 struct bundle_state
*result
;
8006 if (free_bundle_state_chain
!= NULL
)
8008 result
= free_bundle_state_chain
;
8009 free_bundle_state_chain
= result
->next
;
8013 result
= XNEW (struct bundle_state
);
8014 result
->dfa_state
= xmalloc (dfa_state_size
);
8015 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8016 allocated_bundle_states_chain
= result
;
8018 result
->unique_num
= bundle_states_num
++;
8023 /* The following function frees given bundle state. */
8026 free_bundle_state (struct bundle_state
*state
)
8028 state
->next
= free_bundle_state_chain
;
8029 free_bundle_state_chain
= state
;
8032 /* Start work with abstract data `bundle states'. */
8035 initiate_bundle_states (void)
8037 bundle_states_num
= 0;
8038 free_bundle_state_chain
= NULL
;
8039 allocated_bundle_states_chain
= NULL
;
8042 /* Finish work with abstract data `bundle states'. */
8045 finish_bundle_states (void)
8047 struct bundle_state
*curr_state
, *next_state
;
8049 for (curr_state
= allocated_bundle_states_chain
;
8051 curr_state
= next_state
)
8053 next_state
= curr_state
->allocated_states_chain
;
8054 free (curr_state
->dfa_state
);
8059 /* Hash table of the bundle states. The key is dfa_state and insn_num
8060 of the bundle states. */
8062 static htab_t bundle_state_table
;
8064 /* The function returns hash of BUNDLE_STATE. */
8067 bundle_state_hash (const void *bundle_state
)
8069 const struct bundle_state
*const state
8070 = (const struct bundle_state
*) bundle_state
;
8073 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8074 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8075 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8076 return result
+ state
->insn_num
;
8079 /* The function returns nonzero if the bundle state keys are equal. */
8082 bundle_state_eq_p (const void *bundle_state_1
, const void *bundle_state_2
)
8084 const struct bundle_state
*const state1
8085 = (const struct bundle_state
*) bundle_state_1
;
8086 const struct bundle_state
*const state2
8087 = (const struct bundle_state
*) bundle_state_2
;
8089 return (state1
->insn_num
== state2
->insn_num
8090 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8091 dfa_state_size
) == 0);
8094 /* The function inserts the BUNDLE_STATE into the hash table. The
8095 function returns nonzero if the bundle has been inserted into the
8096 table. The table contains the best bundle state with given key. */
8099 insert_bundle_state (struct bundle_state
*bundle_state
)
8103 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, INSERT
);
8104 if (*entry_ptr
== NULL
)
8106 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8107 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8108 *entry_ptr
= (void *) bundle_state
;
8111 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
8112 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
8113 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
8114 > bundle_state
->accumulated_insns_num
8115 || (((struct bundle_state
*)
8116 *entry_ptr
)->accumulated_insns_num
8117 == bundle_state
->accumulated_insns_num
8118 && (((struct bundle_state
*)
8119 *entry_ptr
)->branch_deviation
8120 > bundle_state
->branch_deviation
8121 || (((struct bundle_state
*)
8122 *entry_ptr
)->branch_deviation
8123 == bundle_state
->branch_deviation
8124 && ((struct bundle_state
*)
8125 *entry_ptr
)->middle_bundle_stops
8126 > bundle_state
->middle_bundle_stops
))))))
8129 struct bundle_state temp
;
8131 temp
= *(struct bundle_state
*) *entry_ptr
;
8132 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
8133 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
8134 *bundle_state
= temp
;
8139 /* Start work with the hash table. */
8142 initiate_bundle_state_table (void)
8144 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
8148 /* Finish work with the hash table. */
8151 finish_bundle_state_table (void)
8153 htab_delete (bundle_state_table
);
8158 /* The following variable is a insn `nop' used to check bundle states
8159 with different number of inserted nops. */
8161 static rtx ia64_nop
;
8163 /* The following function tries to issue NOPS_NUM nops for the current
8164 state without advancing processor cycle. If it failed, the
8165 function returns FALSE and frees the current state. */
8168 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8172 for (i
= 0; i
< nops_num
; i
++)
8173 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8175 free_bundle_state (curr_state
);
8181 /* The following function tries to issue INSN for the current
8182 state without advancing processor cycle. If it failed, the
8183 function returns FALSE and frees the current state. */
8186 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8188 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8190 free_bundle_state (curr_state
);
8196 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8197 starting with ORIGINATOR without advancing processor cycle. If
8198 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8199 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8200 If it was successful, the function creates new bundle state and
8201 insert into the hash table and into `index_to_bundle_states'. */
8204 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8205 rtx insn
, int try_bundle_end_p
, int only_bundle_end_p
)
8207 struct bundle_state
*curr_state
;
8209 curr_state
= get_free_bundle_state ();
8210 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8211 curr_state
->insn
= insn
;
8212 curr_state
->insn_num
= originator
->insn_num
+ 1;
8213 curr_state
->cost
= originator
->cost
;
8214 curr_state
->originator
= originator
;
8215 curr_state
->before_nops_num
= before_nops_num
;
8216 curr_state
->after_nops_num
= 0;
8217 curr_state
->accumulated_insns_num
8218 = originator
->accumulated_insns_num
+ before_nops_num
;
8219 curr_state
->branch_deviation
= originator
->branch_deviation
;
8220 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8222 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8224 gcc_assert (GET_MODE (insn
) != TImode
);
8225 if (!try_issue_nops (curr_state
, before_nops_num
))
8227 if (!try_issue_insn (curr_state
, insn
))
8229 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8230 if (curr_state
->accumulated_insns_num
% 3 != 0)
8231 curr_state
->middle_bundle_stops
++;
8232 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8233 && curr_state
->accumulated_insns_num
% 3 != 0)
8235 free_bundle_state (curr_state
);
8239 else if (GET_MODE (insn
) != TImode
)
8241 if (!try_issue_nops (curr_state
, before_nops_num
))
8243 if (!try_issue_insn (curr_state
, insn
))
8245 curr_state
->accumulated_insns_num
++;
8246 gcc_assert (GET_CODE (PATTERN (insn
)) != ASM_INPUT
8247 && asm_noperands (PATTERN (insn
)) < 0);
8249 if (ia64_safe_type (insn
) == TYPE_L
)
8250 curr_state
->accumulated_insns_num
++;
8254 /* If this is an insn that must be first in a group, then don't allow
8255 nops to be emitted before it. Currently, alloc is the only such
8256 supported instruction. */
8257 /* ??? The bundling automatons should handle this for us, but they do
8258 not yet have support for the first_insn attribute. */
8259 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8261 free_bundle_state (curr_state
);
8265 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8266 state_transition (curr_state
->dfa_state
, NULL
);
8268 if (!try_issue_nops (curr_state
, before_nops_num
))
8270 if (!try_issue_insn (curr_state
, insn
))
8272 curr_state
->accumulated_insns_num
++;
8273 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
8274 || asm_noperands (PATTERN (insn
)) >= 0)
8276 /* Finish bundle containing asm insn. */
8277 curr_state
->after_nops_num
8278 = 3 - curr_state
->accumulated_insns_num
% 3;
8279 curr_state
->accumulated_insns_num
8280 += 3 - curr_state
->accumulated_insns_num
% 3;
8282 else if (ia64_safe_type (insn
) == TYPE_L
)
8283 curr_state
->accumulated_insns_num
++;
8285 if (ia64_safe_type (insn
) == TYPE_B
)
8286 curr_state
->branch_deviation
8287 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8288 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8290 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8293 struct bundle_state
*curr_state1
;
8294 struct bundle_state
*allocated_states_chain
;
8296 curr_state1
= get_free_bundle_state ();
8297 dfa_state
= curr_state1
->dfa_state
;
8298 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8299 *curr_state1
= *curr_state
;
8300 curr_state1
->dfa_state
= dfa_state
;
8301 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8302 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8304 curr_state
= curr_state1
;
8306 if (!try_issue_nops (curr_state
,
8307 3 - curr_state
->accumulated_insns_num
% 3))
8309 curr_state
->after_nops_num
8310 = 3 - curr_state
->accumulated_insns_num
% 3;
8311 curr_state
->accumulated_insns_num
8312 += 3 - curr_state
->accumulated_insns_num
% 3;
8314 if (!insert_bundle_state (curr_state
))
8315 free_bundle_state (curr_state
);
8319 /* The following function returns position in the two window bundle
8323 get_max_pos (state_t state
)
8325 if (cpu_unit_reservation_p (state
, pos_6
))
8327 else if (cpu_unit_reservation_p (state
, pos_5
))
8329 else if (cpu_unit_reservation_p (state
, pos_4
))
8331 else if (cpu_unit_reservation_p (state
, pos_3
))
8333 else if (cpu_unit_reservation_p (state
, pos_2
))
8335 else if (cpu_unit_reservation_p (state
, pos_1
))
8341 /* The function returns code of a possible template for given position
8342 and state. The function should be called only with 2 values of
8343 position equal to 3 or 6. We avoid generating F NOPs by putting
8344 templates containing F insns at the end of the template search
8345 because undocumented anomaly in McKinley derived cores which can
8346 cause stalls if an F-unit insn (including a NOP) is issued within a
8347 six-cycle window after reading certain application registers (such
8348 as ar.bsp). Furthermore, power-considerations also argue against
8349 the use of F-unit instructions unless they're really needed. */
8352 get_template (state_t state
, int pos
)
8357 if (cpu_unit_reservation_p (state
, _0mmi_
))
8359 else if (cpu_unit_reservation_p (state
, _0mii_
))
8361 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8363 else if (cpu_unit_reservation_p (state
, _0mib_
))
8365 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8367 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8369 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8371 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8373 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8375 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8380 if (cpu_unit_reservation_p (state
, _1mmi_
))
8382 else if (cpu_unit_reservation_p (state
, _1mii_
))
8384 else if (cpu_unit_reservation_p (state
, _1mmb_
))
8386 else if (cpu_unit_reservation_p (state
, _1mib_
))
8388 else if (cpu_unit_reservation_p (state
, _1mbb_
))
8390 else if (cpu_unit_reservation_p (state
, _1bbb_
))
8392 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
8394 else if (cpu_unit_reservation_p (state
, _1mfi_
))
8396 else if (cpu_unit_reservation_p (state
, _1mfb_
))
8398 else if (cpu_unit_reservation_p (state
, _1mlx_
))
8407 /* True when INSN is important for bundling. */
8409 important_for_bundling_p (rtx insn
)
8411 return (INSN_P (insn
)
8412 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8413 && GET_CODE (PATTERN (insn
)) != USE
8414 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8417 /* The following function returns an insn important for insn bundling
8418 followed by INSN and before TAIL. */
8421 get_next_important_insn (rtx insn
, rtx tail
)
8423 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
8424 if (important_for_bundling_p (insn
))
8429 /* Add a bundle selector TEMPLATE0 before INSN. */
8432 ia64_add_bundle_selector_before (int template0
, rtx insn
)
8434 rtx b
= gen_bundle_selector (GEN_INT (template0
));
8436 ia64_emit_insn_before (b
, insn
);
8437 #if NR_BUNDLES == 10
8438 if ((template0
== 4 || template0
== 5)
8439 && (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
)))
8442 rtx note
= NULL_RTX
;
8444 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8445 first or second slot. If it is and has REG_EH_NOTE set, copy it
8446 to following nops, as br.call sets rp to the address of following
8447 bundle and therefore an EH region end must be on a bundle
8449 insn
= PREV_INSN (insn
);
8450 for (i
= 0; i
< 3; i
++)
8453 insn
= next_active_insn (insn
);
8454 while (GET_CODE (insn
) == INSN
8455 && get_attr_empty (insn
) == EMPTY_YES
);
8456 if (GET_CODE (insn
) == CALL_INSN
)
8457 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
8462 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
8463 || code
== CODE_FOR_nop_b
);
8464 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
8467 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
8474 /* The following function does insn bundling. Bundling means
8475 inserting templates and nop insns to fit insn groups into permitted
8476 templates. Instruction scheduling uses NDFA (non-deterministic
8477 finite automata) encoding informations about the templates and the
8478 inserted nops. Nondeterminism of the automata permits follows
8479 all possible insn sequences very fast.
8481 Unfortunately it is not possible to get information about inserting
8482 nop insns and used templates from the automata states. The
8483 automata only says that we can issue an insn possibly inserting
8484 some nops before it and using some template. Therefore insn
8485 bundling in this function is implemented by using DFA
8486 (deterministic finite automata). We follow all possible insn
8487 sequences by inserting 0-2 nops (that is what the NDFA describe for
8488 insn scheduling) before/after each insn being bundled. We know the
8489 start of simulated processor cycle from insn scheduling (insn
8490 starting a new cycle has TImode).
8492 Simple implementation of insn bundling would create enormous
8493 number of possible insn sequences satisfying information about new
8494 cycle ticks taken from the insn scheduling. To make the algorithm
8495 practical we use dynamic programming. Each decision (about
8496 inserting nops and implicitly about previous decisions) is described
8497 by structure bundle_state (see above). If we generate the same
8498 bundle state (key is automaton state after issuing the insns and
8499 nops for it), we reuse already generated one. As consequence we
8500 reject some decisions which cannot improve the solution and
8501 reduce memory for the algorithm.
8503 When we reach the end of EBB (extended basic block), we choose the
8504 best sequence and then, moving back in EBB, insert templates for
8505 the best alternative. The templates are taken from querying
8506 automaton state for each insn in chosen bundle states.
8508 So the algorithm makes two (forward and backward) passes through
8512 bundling (FILE *dump
, int verbose
, rtx prev_head_insn
, rtx tail
)
8514 struct bundle_state
*curr_state
, *next_state
, *best_state
;
8515 rtx insn
, next_insn
;
8517 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
8518 int pos
= 0, max_pos
, template0
, template1
;
8521 enum attr_type type
;
8524 /* Count insns in the EBB. */
8525 for (insn
= NEXT_INSN (prev_head_insn
);
8526 insn
&& insn
!= tail
;
8527 insn
= NEXT_INSN (insn
))
8533 dfa_clean_insn_cache ();
8534 initiate_bundle_state_table ();
8535 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
8536 /* First (forward) pass -- generation of bundle states. */
8537 curr_state
= get_free_bundle_state ();
8538 curr_state
->insn
= NULL
;
8539 curr_state
->before_nops_num
= 0;
8540 curr_state
->after_nops_num
= 0;
8541 curr_state
->insn_num
= 0;
8542 curr_state
->cost
= 0;
8543 curr_state
->accumulated_insns_num
= 0;
8544 curr_state
->branch_deviation
= 0;
8545 curr_state
->middle_bundle_stops
= 0;
8546 curr_state
->next
= NULL
;
8547 curr_state
->originator
= NULL
;
8548 state_reset (curr_state
->dfa_state
);
8549 index_to_bundle_states
[0] = curr_state
;
8551 /* Shift cycle mark if it is put on insn which could be ignored. */
8552 for (insn
= NEXT_INSN (prev_head_insn
);
8554 insn
= NEXT_INSN (insn
))
8556 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
8557 || GET_CODE (PATTERN (insn
)) == USE
8558 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
8559 && GET_MODE (insn
) == TImode
)
8561 PUT_MODE (insn
, VOIDmode
);
8562 for (next_insn
= NEXT_INSN (insn
);
8564 next_insn
= NEXT_INSN (next_insn
))
8565 if (INSN_P (next_insn
)
8566 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
8567 && GET_CODE (PATTERN (next_insn
)) != USE
8568 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
8569 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
8571 PUT_MODE (next_insn
, TImode
);
8575 /* Forward pass: generation of bundle states. */
8576 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
8580 gcc_assert (INSN_P (insn
)
8581 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8582 && GET_CODE (PATTERN (insn
)) != USE
8583 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8584 type
= ia64_safe_type (insn
);
8585 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
8587 index_to_bundle_states
[insn_num
] = NULL
;
8588 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
8590 curr_state
= next_state
)
8592 pos
= curr_state
->accumulated_insns_num
% 3;
8593 next_state
= curr_state
->next
;
8594 /* We must fill up the current bundle in order to start a
8595 subsequent asm insn in a new bundle. Asm insn is always
8596 placed in a separate bundle. */
8598 = (next_insn
!= NULL_RTX
8599 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
8600 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
8601 /* We may fill up the current bundle if it is the cycle end
8602 without a group barrier. */
8604 = (only_bundle_end_p
|| next_insn
== NULL_RTX
8605 || (GET_MODE (next_insn
) == TImode
8606 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
8607 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
8609 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
8611 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
8613 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
8616 gcc_assert (index_to_bundle_states
[insn_num
]);
8617 for (curr_state
= index_to_bundle_states
[insn_num
];
8619 curr_state
= curr_state
->next
)
8620 if (verbose
>= 2 && dump
)
8622 /* This structure is taken from generated code of the
8623 pipeline hazard recognizer (see file insn-attrtab.c).
8624 Please don't forget to change the structure if a new
8625 automaton is added to .md file. */
8628 unsigned short one_automaton_state
;
8629 unsigned short oneb_automaton_state
;
8630 unsigned short two_automaton_state
;
8631 unsigned short twob_automaton_state
;
8636 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8637 curr_state
->unique_num
,
8638 (curr_state
->originator
== NULL
8639 ? -1 : curr_state
->originator
->unique_num
),
8641 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
8642 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
8643 curr_state
->middle_bundle_stops
,
8644 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
8649 /* We should find a solution because the 2nd insn scheduling has
8651 gcc_assert (index_to_bundle_states
[insn_num
]);
8652 /* Find a state corresponding to the best insn sequence. */
8654 for (curr_state
= index_to_bundle_states
[insn_num
];
8656 curr_state
= curr_state
->next
)
8657 /* We are just looking at the states with fully filled up last
8658 bundle. The first we prefer insn sequences with minimal cost
8659 then with minimal inserted nops and finally with branch insns
8660 placed in the 3rd slots. */
8661 if (curr_state
->accumulated_insns_num
% 3 == 0
8662 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
8663 || (best_state
->cost
== curr_state
->cost
8664 && (curr_state
->accumulated_insns_num
8665 < best_state
->accumulated_insns_num
8666 || (curr_state
->accumulated_insns_num
8667 == best_state
->accumulated_insns_num
8668 && (curr_state
->branch_deviation
8669 < best_state
->branch_deviation
8670 || (curr_state
->branch_deviation
8671 == best_state
->branch_deviation
8672 && curr_state
->middle_bundle_stops
8673 < best_state
->middle_bundle_stops
)))))))
8674 best_state
= curr_state
;
8675 /* Second (backward) pass: adding nops and templates. */
8676 gcc_assert (best_state
);
8677 insn_num
= best_state
->before_nops_num
;
8678 template0
= template1
= -1;
8679 for (curr_state
= best_state
;
8680 curr_state
->originator
!= NULL
;
8681 curr_state
= curr_state
->originator
)
8683 insn
= curr_state
->insn
;
8684 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
8685 || asm_noperands (PATTERN (insn
)) >= 0);
8687 if (verbose
>= 2 && dump
)
8691 unsigned short one_automaton_state
;
8692 unsigned short oneb_automaton_state
;
8693 unsigned short two_automaton_state
;
8694 unsigned short twob_automaton_state
;
8699 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8700 curr_state
->unique_num
,
8701 (curr_state
->originator
== NULL
8702 ? -1 : curr_state
->originator
->unique_num
),
8704 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
8705 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
8706 curr_state
->middle_bundle_stops
,
8707 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
8710 /* Find the position in the current bundle window. The window can
8711 contain at most two bundles. Two bundle window means that
8712 the processor will make two bundle rotation. */
8713 max_pos
= get_max_pos (curr_state
->dfa_state
);
8715 /* The following (negative template number) means that the
8716 processor did one bundle rotation. */
8717 || (max_pos
== 3 && template0
< 0))
8719 /* We are at the end of the window -- find template(s) for
8723 template0
= get_template (curr_state
->dfa_state
, 3);
8726 template1
= get_template (curr_state
->dfa_state
, 3);
8727 template0
= get_template (curr_state
->dfa_state
, 6);
8730 if (max_pos
> 3 && template1
< 0)
8731 /* It may happen when we have the stop inside a bundle. */
8733 gcc_assert (pos
<= 3);
8734 template1
= get_template (curr_state
->dfa_state
, 3);
8738 /* Emit nops after the current insn. */
8739 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
8742 emit_insn_after (nop
, insn
);
8744 gcc_assert (pos
>= 0);
8747 /* We are at the start of a bundle: emit the template
8748 (it should be defined). */
8749 gcc_assert (template0
>= 0);
8750 ia64_add_bundle_selector_before (template0
, nop
);
8751 /* If we have two bundle window, we make one bundle
8752 rotation. Otherwise template0 will be undefined
8753 (negative value). */
8754 template0
= template1
;
8758 /* Move the position backward in the window. Group barrier has
8759 no slot. Asm insn takes all bundle. */
8760 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
8761 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
8762 && asm_noperands (PATTERN (insn
)) < 0)
8764 /* Long insn takes 2 slots. */
8765 if (ia64_safe_type (insn
) == TYPE_L
)
8767 gcc_assert (pos
>= 0);
8769 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
8770 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
8771 && asm_noperands (PATTERN (insn
)) < 0)
8773 /* The current insn is at the bundle start: emit the
8775 gcc_assert (template0
>= 0);
8776 ia64_add_bundle_selector_before (template0
, insn
);
8777 b
= PREV_INSN (insn
);
8779 /* See comment above in analogous place for emitting nops
8781 template0
= template1
;
8784 /* Emit nops after the current insn. */
8785 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
8788 ia64_emit_insn_before (nop
, insn
);
8789 nop
= PREV_INSN (insn
);
8792 gcc_assert (pos
>= 0);
8795 /* See comment above in analogous place for emitting nops
8797 gcc_assert (template0
>= 0);
8798 ia64_add_bundle_selector_before (template0
, insn
);
8799 b
= PREV_INSN (insn
);
8801 template0
= template1
;
8807 #ifdef ENABLE_CHECKING
8809 /* Assert right calculation of middle_bundle_stops. */
8810 int num
= best_state
->middle_bundle_stops
;
8811 bool start_bundle
= true, end_bundle
= false;
8813 for (insn
= NEXT_INSN (prev_head_insn
);
8814 insn
&& insn
!= tail
;
8815 insn
= NEXT_INSN (insn
))
8819 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
8820 start_bundle
= true;
8825 for (next_insn
= NEXT_INSN (insn
);
8826 next_insn
&& next_insn
!= tail
;
8827 next_insn
= NEXT_INSN (next_insn
))
8828 if (INSN_P (next_insn
)
8829 && (ia64_safe_itanium_class (next_insn
)
8830 != ITANIUM_CLASS_IGNORE
8831 || recog_memoized (next_insn
)
8832 == CODE_FOR_bundle_selector
)
8833 && GET_CODE (PATTERN (next_insn
)) != USE
8834 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
8837 end_bundle
= next_insn
== NULL_RTX
8838 || next_insn
== tail
8839 || (INSN_P (next_insn
)
8840 && recog_memoized (next_insn
)
8841 == CODE_FOR_bundle_selector
);
8842 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
8843 && !start_bundle
&& !end_bundle
8845 && GET_CODE (PATTERN (next_insn
)) != ASM_INPUT
8846 && asm_noperands (PATTERN (next_insn
)) < 0)
8849 start_bundle
= false;
8853 gcc_assert (num
== 0);
8857 free (index_to_bundle_states
);
8858 finish_bundle_state_table ();
8860 dfa_clean_insn_cache ();
8863 /* The following function is called at the end of scheduling BB or
8864 EBB. After reload, it inserts stop bits and does insn bundling. */
8867 ia64_sched_finish (FILE *dump
, int sched_verbose
)
8870 fprintf (dump
, "// Finishing schedule.\n");
8871 if (!reload_completed
)
8873 if (reload_completed
)
8875 final_emit_insn_group_barriers (dump
);
8876 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
8877 current_sched_info
->next_tail
);
8878 if (sched_verbose
&& dump
)
8879 fprintf (dump
, "// finishing %d-%d\n",
8880 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
8881 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
8887 /* The following function inserts stop bits in scheduled BB or EBB. */
8890 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
8893 int need_barrier_p
= 0;
8894 int seen_good_insn
= 0;
8895 rtx prev_insn
= NULL_RTX
;
8897 init_insn_group_barriers ();
8899 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
8900 insn
!= current_sched_info
->next_tail
;
8901 insn
= NEXT_INSN (insn
))
8903 if (GET_CODE (insn
) == BARRIER
)
8905 rtx last
= prev_active_insn (insn
);
8909 if (GET_CODE (last
) == JUMP_INSN
8910 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
8911 last
= prev_active_insn (last
);
8912 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
8913 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
8915 init_insn_group_barriers ();
8918 prev_insn
= NULL_RTX
;
8920 else if (NONDEBUG_INSN_P (insn
))
8922 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
8924 init_insn_group_barriers ();
8927 prev_insn
= NULL_RTX
;
8929 else if (need_barrier_p
|| group_barrier_needed (insn
)
8930 || (mflag_sched_stop_bits_after_every_cycle
8931 && GET_MODE (insn
) == TImode
8934 if (TARGET_EARLY_STOP_BITS
)
8939 last
!= current_sched_info
->prev_head
;
8940 last
= PREV_INSN (last
))
8941 if (INSN_P (last
) && GET_MODE (last
) == TImode
8942 && stops_p
[INSN_UID (last
)])
8944 if (last
== current_sched_info
->prev_head
)
8946 last
= prev_active_insn (last
);
8948 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
8949 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8951 init_insn_group_barriers ();
8952 for (last
= NEXT_INSN (last
);
8954 last
= NEXT_INSN (last
))
8957 group_barrier_needed (last
);
8958 if (recog_memoized (last
) >= 0
8959 && important_for_bundling_p (last
))
8965 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8967 init_insn_group_barriers ();
8970 group_barrier_needed (insn
);
8971 if (recog_memoized (insn
) >= 0
8972 && important_for_bundling_p (insn
))
8974 prev_insn
= NULL_RTX
;
8976 else if (recog_memoized (insn
) >= 0
8977 && important_for_bundling_p (insn
))
8982 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
8983 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
8984 || asm_noperands (PATTERN (insn
)) >= 0);
8991 /* If the following function returns TRUE, we will use the DFA
8995 ia64_first_cycle_multipass_dfa_lookahead (void)
8997 return (reload_completed
? 6 : 4);
9000 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9003 ia64_init_dfa_pre_cycle_insn (void)
9005 if (temp_dfa_state
== NULL
)
9007 dfa_state_size
= state_size ();
9008 temp_dfa_state
= xmalloc (dfa_state_size
);
9009 prev_cycle_state
= xmalloc (dfa_state_size
);
9011 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9012 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9013 recog_memoized (dfa_pre_cycle_insn
);
9014 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9015 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9016 recog_memoized (dfa_stop_insn
);
9019 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9020 used by the DFA insn scheduler. */
9023 ia64_dfa_pre_cycle_insn (void)
9025 return dfa_pre_cycle_insn
;
9028 /* The following function returns TRUE if PRODUCER (of type ilog or
9029 ld) produces address for CONSUMER (of type st or stf). */
9032 ia64_st_address_bypass_p (rtx producer
, rtx consumer
)
9036 gcc_assert (producer
&& consumer
);
9037 dest
= ia64_single_set (producer
);
9039 reg
= SET_DEST (dest
);
9041 if (GET_CODE (reg
) == SUBREG
)
9042 reg
= SUBREG_REG (reg
);
9043 gcc_assert (GET_CODE (reg
) == REG
);
9045 dest
= ia64_single_set (consumer
);
9047 mem
= SET_DEST (dest
);
9048 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9049 return reg_mentioned_p (reg
, mem
);
9052 /* The following function returns TRUE if PRODUCER (of type ilog or
9053 ld) produces address for CONSUMER (of type ld or fld). */
9056 ia64_ld_address_bypass_p (rtx producer
, rtx consumer
)
9058 rtx dest
, src
, reg
, mem
;
9060 gcc_assert (producer
&& consumer
);
9061 dest
= ia64_single_set (producer
);
9063 reg
= SET_DEST (dest
);
9065 if (GET_CODE (reg
) == SUBREG
)
9066 reg
= SUBREG_REG (reg
);
9067 gcc_assert (GET_CODE (reg
) == REG
);
9069 src
= ia64_single_set (consumer
);
9071 mem
= SET_SRC (src
);
9074 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9075 mem
= XVECEXP (mem
, 0, 0);
9076 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9077 /* ??? Is this bypass necessary for ld.c? */
9079 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9080 mem
= XEXP (mem
, 1);
9083 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9084 mem
= XEXP (mem
, 0);
9086 if (GET_CODE (mem
) == UNSPEC
)
9088 int c
= XINT (mem
, 1);
9090 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9091 || c
== UNSPEC_LDSA
);
9092 mem
= XVECEXP (mem
, 0, 0);
9095 /* Note that LO_SUM is used for GOT loads. */
9096 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9098 return reg_mentioned_p (reg
, mem
);
9101 /* The following function returns TRUE if INSN produces address for a
9102 load/store insn. We will place such insns into M slot because it
9103 decreases its latency time. */
9106 ia64_produce_address_p (rtx insn
)
9112 /* Emit pseudo-ops for the assembler to describe predicate relations.
9113 At present this assumes that we only consider predicate pairs to
9114 be mutex, and that the assembler can deduce proper values from
9115 straight-line code. */
9118 emit_predicate_relation_info (void)
9122 FOR_EACH_BB_REVERSE (bb
)
9125 rtx head
= BB_HEAD (bb
);
9127 /* We only need such notes at code labels. */
9128 if (GET_CODE (head
) != CODE_LABEL
)
9130 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9131 head
= NEXT_INSN (head
);
9133 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9134 grabbing the entire block of predicate registers. */
9135 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9136 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9138 rtx p
= gen_rtx_REG (BImode
, r
);
9139 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9140 if (head
== BB_END (bb
))
9146 /* Look for conditional calls that do not return, and protect predicate
9147 relations around them. Otherwise the assembler will assume the call
9148 returns, and complain about uses of call-clobbered predicates after
9150 FOR_EACH_BB_REVERSE (bb
)
9152 rtx insn
= BB_HEAD (bb
);
9156 if (GET_CODE (insn
) == CALL_INSN
9157 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9158 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9160 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
9161 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9162 if (BB_HEAD (bb
) == insn
)
9164 if (BB_END (bb
) == insn
)
9168 if (insn
== BB_END (bb
))
9170 insn
= NEXT_INSN (insn
);
9175 /* Perform machine dependent operations on the rtl chain INSNS. */
9180 /* We are freeing block_for_insn in the toplev to keep compatibility
9181 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9182 compute_bb_for_insn ();
9184 /* If optimizing, we'll have split before scheduling. */
9188 if (optimize
&& ia64_flag_schedule_insns2
9189 && dbg_cnt (ia64_sched2
))
9191 timevar_push (TV_SCHED2
);
9192 ia64_final_schedule
= 1;
9194 initiate_bundle_states ();
9195 ia64_nop
= make_insn_raw (gen_nop ());
9196 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
9197 recog_memoized (ia64_nop
);
9198 clocks_length
= get_max_uid () + 1;
9199 stops_p
= XCNEWVEC (char, clocks_length
);
9201 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9203 pos_1
= get_cpu_unit_code ("2_1");
9204 pos_2
= get_cpu_unit_code ("2_2");
9205 pos_3
= get_cpu_unit_code ("2_3");
9206 pos_4
= get_cpu_unit_code ("2_4");
9207 pos_5
= get_cpu_unit_code ("2_5");
9208 pos_6
= get_cpu_unit_code ("2_6");
9209 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9210 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9211 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9212 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9213 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9214 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9215 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9216 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9217 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9218 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9219 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9220 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9221 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9222 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9223 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9224 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9225 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9226 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9227 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9228 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9232 pos_1
= get_cpu_unit_code ("1_1");
9233 pos_2
= get_cpu_unit_code ("1_2");
9234 pos_3
= get_cpu_unit_code ("1_3");
9235 pos_4
= get_cpu_unit_code ("1_4");
9236 pos_5
= get_cpu_unit_code ("1_5");
9237 pos_6
= get_cpu_unit_code ("1_6");
9238 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9239 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9240 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9241 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9242 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9243 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9244 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9245 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9246 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9247 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9248 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9249 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9250 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9251 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9252 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9253 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9254 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9255 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9256 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9257 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9260 if (flag_selective_scheduling2
9261 && !maybe_skip_selective_scheduling ())
9262 run_selective_scheduling ();
9266 /* Redo alignment computation, as it might gone wrong. */
9267 compute_alignments ();
9269 /* We cannot reuse this one because it has been corrupted by the
9271 finish_bundle_states ();
9274 emit_insn_group_barriers (dump_file
);
9276 ia64_final_schedule
= 0;
9277 timevar_pop (TV_SCHED2
);
9280 emit_all_insn_group_barriers (dump_file
);
9284 /* A call must not be the last instruction in a function, so that the
9285 return address is still within the function, so that unwinding works
9286 properly. Note that IA-64 differs from dwarf2 on this point. */
9287 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
9292 insn
= get_last_insn ();
9293 if (! INSN_P (insn
))
9294 insn
= prev_active_insn (insn
);
9297 /* Skip over insns that expand to nothing. */
9298 while (GET_CODE (insn
) == INSN
9299 && get_attr_empty (insn
) == EMPTY_YES
)
9301 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9302 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9304 insn
= prev_active_insn (insn
);
9306 if (GET_CODE (insn
) == CALL_INSN
)
9309 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9310 emit_insn (gen_break_f ());
9311 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9316 emit_predicate_relation_info ();
9318 if (ia64_flag_var_tracking
)
9320 timevar_push (TV_VAR_TRACKING
);
9321 variable_tracking_main ();
9322 timevar_pop (TV_VAR_TRACKING
);
9324 df_finish_pass (false);
9327 /* Return true if REGNO is used by the epilogue. */
9330 ia64_epilogue_uses (int regno
)
9335 /* With a call to a function in another module, we will write a new
9336 value to "gp". After returning from such a call, we need to make
9337 sure the function restores the original gp-value, even if the
9338 function itself does not use the gp anymore. */
9339 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9341 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9342 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9343 /* For functions defined with the syscall_linkage attribute, all
9344 input registers are marked as live at all function exits. This
9345 prevents the register allocator from using the input registers,
9346 which in turn makes it possible to restart a system call after
9347 an interrupt without having to save/restore the input registers.
9348 This also prevents kernel data from leaking to application code. */
9349 return lookup_attribute ("syscall_linkage",
9350 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9353 /* Conditional return patterns can't represent the use of `b0' as
9354 the return address, so we force the value live this way. */
9358 /* Likewise for ar.pfs, which is used by br.ret. */
9366 /* Return true if REGNO is used by the frame unwinder. */
9369 ia64_eh_uses (int regno
)
9373 if (! reload_completed
)
9379 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9380 if (regno
== current_frame_info
.r
[r
]
9381 || regno
== emitted_frame_related_regs
[r
])
9387 /* Return true if this goes in small data/bss. */
9389 /* ??? We could also support own long data here. Generating movl/add/ld8
9390 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9391 code faster because there is one less load. This also includes incomplete
9392 types which can't go in sdata/sbss. */
9395 ia64_in_small_data_p (const_tree exp
)
9397 if (TARGET_NO_SDATA
)
9400 /* We want to merge strings, so we never consider them small data. */
9401 if (TREE_CODE (exp
) == STRING_CST
)
9404 /* Functions are never small data. */
9405 if (TREE_CODE (exp
) == FUNCTION_DECL
)
9408 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
9410 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
9412 if (strcmp (section
, ".sdata") == 0
9413 || strncmp (section
, ".sdata.", 7) == 0
9414 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
9415 || strcmp (section
, ".sbss") == 0
9416 || strncmp (section
, ".sbss.", 6) == 0
9417 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
9422 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
9424 /* If this is an incomplete type with size 0, then we can't put it
9425 in sdata because it might be too big when completed. */
9426 if (size
> 0 && size
<= ia64_section_threshold
)
9433 /* Output assembly directives for prologue regions. */
9435 /* The current basic block number. */
9437 static bool last_block
;
9439 /* True if we need a copy_state command at the start of the next block. */
9441 static bool need_copy_state
;
9443 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9444 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9447 /* Emit a debugging label after a call-frame-related insn. We'd
9448 rather output the label right away, but we'd have to output it
9449 after, not before, the instruction, and the instruction has not
9450 been output yet. So we emit the label after the insn, delete it to
9451 avoid introducing basic blocks, and mark it as preserved, such that
9452 it is still output, given that it is referenced in debug info. */
9455 ia64_emit_deleted_label_after_insn (rtx insn
)
9457 char label
[MAX_ARTIFICIAL_LABEL_BYTES
];
9458 rtx lb
= gen_label_rtx ();
9459 rtx label_insn
= emit_label_after (lb
, insn
);
9461 LABEL_PRESERVE_P (lb
) = 1;
9463 delete_insn (label_insn
);
9465 ASM_GENERATE_INTERNAL_LABEL (label
, "L", CODE_LABEL_NUMBER (label_insn
));
9467 return xstrdup (label
);
9470 /* Define the CFA after INSN with the steady-state definition. */
9473 ia64_dwarf2out_def_steady_cfa (rtx insn
, bool frame
)
9475 rtx fp
= frame_pointer_needed
9476 ? hard_frame_pointer_rtx
9477 : stack_pointer_rtx
;
9478 const char *label
= ia64_emit_deleted_label_after_insn (insn
);
9485 ia64_initial_elimination_offset
9486 (REGNO (arg_pointer_rtx
), REGNO (fp
))
9487 + ARG_POINTER_CFA_OFFSET (current_function_decl
));
9490 /* The generic dwarf2 frame debug info generator does not define a
9491 separate region for the very end of the epilogue, so refrain from
9492 doing so in the IA64-specific code as well. */
9494 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9496 /* The function emits unwind directives for the start of an epilogue. */
9499 process_epilogue (FILE *asm_out_file
, rtx insn
, bool unwind
, bool frame
)
9501 /* If this isn't the last block of the function, then we need to label the
9502 current state, and copy it back in at the start of the next block. */
9507 fprintf (asm_out_file
, "\t.label_state %d\n",
9508 ++cfun
->machine
->state_num
);
9509 need_copy_state
= true;
9513 fprintf (asm_out_file
, "\t.restore sp\n");
9514 if (IA64_CHANGE_CFA_IN_EPILOGUE
&& frame
)
9515 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn
),
9516 STACK_POINTER_REGNUM
, INCOMING_FRAME_SP_OFFSET
);
9519 /* This function processes a SET pattern looking for specific patterns
9520 which result in emitting an assembly directive required for unwinding. */
9523 process_set (FILE *asm_out_file
, rtx pat
, rtx insn
, bool unwind
, bool frame
)
9525 rtx src
= SET_SRC (pat
);
9526 rtx dest
= SET_DEST (pat
);
9527 int src_regno
, dest_regno
;
9529 /* Look for the ALLOC insn. */
9530 if (GET_CODE (src
) == UNSPEC_VOLATILE
9531 && XINT (src
, 1) == UNSPECV_ALLOC
9532 && GET_CODE (dest
) == REG
)
9534 dest_regno
= REGNO (dest
);
9536 /* If this is the final destination for ar.pfs, then this must
9537 be the alloc in the prologue. */
9538 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
9541 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
9542 ia64_dbx_register_number (dest_regno
));
9546 /* This must be an alloc before a sibcall. We must drop the
9547 old frame info. The easiest way to drop the old frame
9548 info is to ensure we had a ".restore sp" directive
9549 followed by a new prologue. If the procedure doesn't
9550 have a memory-stack frame, we'll issue a dummy ".restore
9552 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
9553 /* if haven't done process_epilogue() yet, do it now */
9554 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
9556 fprintf (asm_out_file
, "\t.prologue\n");
9561 /* Look for SP = .... */
9562 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
9564 if (GET_CODE (src
) == PLUS
)
9566 rtx op0
= XEXP (src
, 0);
9567 rtx op1
= XEXP (src
, 1);
9569 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
9571 if (INTVAL (op1
) < 0)
9573 gcc_assert (!frame_pointer_needed
);
9575 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
9577 ia64_dwarf2out_def_steady_cfa (insn
, frame
);
9580 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
9584 gcc_assert (GET_CODE (src
) == REG
9585 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
);
9586 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
9592 /* Register move we need to look at. */
9593 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
9595 src_regno
= REGNO (src
);
9596 dest_regno
= REGNO (dest
);
9601 /* Saving return address pointer. */
9602 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_b0
]);
9604 fprintf (asm_out_file
, "\t.save rp, r%d\n",
9605 ia64_dbx_register_number (dest_regno
));
9609 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
9611 fprintf (asm_out_file
, "\t.save pr, r%d\n",
9612 ia64_dbx_register_number (dest_regno
));
9615 case AR_UNAT_REGNUM
:
9616 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
9618 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
9619 ia64_dbx_register_number (dest_regno
));
9623 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
9625 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
9626 ia64_dbx_register_number (dest_regno
));
9629 case STACK_POINTER_REGNUM
:
9630 gcc_assert (dest_regno
== HARD_FRAME_POINTER_REGNUM
9631 && frame_pointer_needed
);
9633 fprintf (asm_out_file
, "\t.vframe r%d\n",
9634 ia64_dbx_register_number (dest_regno
));
9635 ia64_dwarf2out_def_steady_cfa (insn
, frame
);
9639 /* Everything else should indicate being stored to memory. */
9644 /* Memory store we need to look at. */
9645 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
9651 if (GET_CODE (XEXP (dest
, 0)) == REG
)
9653 base
= XEXP (dest
, 0);
9658 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
9659 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
9660 base
= XEXP (XEXP (dest
, 0), 0);
9661 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
9664 if (base
== hard_frame_pointer_rtx
)
9666 saveop
= ".savepsp";
9671 gcc_assert (base
== stack_pointer_rtx
);
9675 src_regno
= REGNO (src
);
9679 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
9681 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
9685 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
9687 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
9691 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
9693 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
9697 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
9699 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
9702 case AR_UNAT_REGNUM
:
9703 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
9705 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
9713 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
9714 1 << (src_regno
- GR_REG (4)));
9723 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
9724 1 << (src_regno
- BR_REG (1)));
9732 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
9733 1 << (src_regno
- FR_REG (2)));
9736 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9737 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9738 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9739 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9741 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
9742 1 << (src_regno
- FR_REG (12)));
9754 /* This function looks at a single insn and emits any directives
9755 required to unwind this insn. */
9757 process_for_unwind_directive (FILE *asm_out_file
, rtx insn
)
9759 bool unwind
= (flag_unwind_tables
9760 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
));
9761 bool frame
= dwarf2out_do_frame ();
9763 if (unwind
|| frame
)
9767 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
9769 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
9771 /* Restore unwind state from immediately before the epilogue. */
9772 if (need_copy_state
)
9776 fprintf (asm_out_file
, "\t.body\n");
9777 fprintf (asm_out_file
, "\t.copy_state %d\n",
9778 cfun
->machine
->state_num
);
9780 if (IA64_CHANGE_CFA_IN_EPILOGUE
)
9781 ia64_dwarf2out_def_steady_cfa (insn
, frame
);
9782 need_copy_state
= false;
9786 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
9789 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
9791 pat
= XEXP (pat
, 0);
9793 pat
= PATTERN (insn
);
9795 switch (GET_CODE (pat
))
9798 process_set (asm_out_file
, pat
, insn
, unwind
, frame
);
9804 int limit
= XVECLEN (pat
, 0);
9805 for (par_index
= 0; par_index
< limit
; par_index
++)
9807 rtx x
= XVECEXP (pat
, 0, par_index
);
9808 if (GET_CODE (x
) == SET
)
9809 process_set (asm_out_file
, x
, insn
, unwind
, frame
);
9824 IA64_BUILTIN_COPYSIGNQ
,
9826 IA64_BUILTIN_FLUSHRS
,
9828 IA64_BUILTIN_HUGE_VALQ
9832 ia64_init_builtins (void)
9837 /* The __fpreg type. */
9838 fpreg_type
= make_node (REAL_TYPE
);
9839 TYPE_PRECISION (fpreg_type
) = 82;
9840 layout_type (fpreg_type
);
9841 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
9843 /* The __float80 type. */
9844 float80_type
= make_node (REAL_TYPE
);
9845 TYPE_PRECISION (float80_type
) = 80;
9846 layout_type (float80_type
);
9847 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
9849 /* The __float128 type. */
9853 tree float128_type
= make_node (REAL_TYPE
);
9855 TYPE_PRECISION (float128_type
) = 128;
9856 layout_type (float128_type
);
9857 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
9859 /* TFmode support builtins. */
9860 ftype
= build_function_type (float128_type
, void_list_node
);
9861 add_builtin_function ("__builtin_infq", ftype
,
9862 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
9865 add_builtin_function ("__builtin_huge_valq", ftype
,
9866 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
9869 ftype
= build_function_type_list (float128_type
,
9872 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
9873 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
9874 "__fabstf2", NULL_TREE
);
9875 TREE_READONLY (decl
) = 1;
9877 ftype
= build_function_type_list (float128_type
,
9881 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
9882 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
9883 "__copysigntf3", NULL_TREE
);
9884 TREE_READONLY (decl
) = 1;
9887 /* Under HPUX, this is a synonym for "long double". */
9888 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
9891 /* Fwrite on VMS is non-standard. */
9892 if (TARGET_ABI_OPEN_VMS
)
9894 implicit_built_in_decls
[(int) BUILT_IN_FWRITE
] = NULL_TREE
;
9895 implicit_built_in_decls
[(int) BUILT_IN_FWRITE_UNLOCKED
] = NULL_TREE
;
9898 #define def_builtin(name, type, code) \
9899 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9902 def_builtin ("__builtin_ia64_bsp",
9903 build_function_type (ptr_type_node
, void_list_node
),
9906 def_builtin ("__builtin_ia64_flushrs",
9907 build_function_type (void_type_node
, void_list_node
),
9908 IA64_BUILTIN_FLUSHRS
);
9914 if (built_in_decls
[BUILT_IN_FINITE
])
9915 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITE
],
9917 if (built_in_decls
[BUILT_IN_FINITEF
])
9918 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEF
],
9920 if (built_in_decls
[BUILT_IN_FINITEL
])
9921 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEL
],
9927 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
9928 enum machine_mode mode ATTRIBUTE_UNUSED
,
9929 int ignore ATTRIBUTE_UNUSED
)
9931 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
9932 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
9936 case IA64_BUILTIN_BSP
:
9937 if (! target
|| ! register_operand (target
, DImode
))
9938 target
= gen_reg_rtx (DImode
);
9939 emit_insn (gen_bsp_value (target
));
9940 #ifdef POINTERS_EXTEND_UNSIGNED
9941 target
= convert_memory_address (ptr_mode
, target
);
9945 case IA64_BUILTIN_FLUSHRS
:
9946 emit_insn (gen_flushrs ());
9949 case IA64_BUILTIN_INFQ
:
9950 case IA64_BUILTIN_HUGE_VALQ
:
9952 REAL_VALUE_TYPE inf
;
9956 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
9958 tmp
= validize_mem (force_const_mem (mode
, tmp
));
9961 target
= gen_reg_rtx (mode
);
9963 emit_move_insn (target
, tmp
);
9967 case IA64_BUILTIN_FABSQ
:
9968 case IA64_BUILTIN_COPYSIGNQ
:
9969 return expand_call (exp
, target
, ignore
);
9978 /* For the HP-UX IA64 aggregate parameters are passed stored in the
9979 most significant bits of the stack slot. */
9982 ia64_hpux_function_arg_padding (enum machine_mode mode
, const_tree type
)
9984 /* Exception to normal case for structures/unions/etc. */
9986 if (type
&& AGGREGATE_TYPE_P (type
)
9987 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
9990 /* Fall back to the default. */
9991 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
9994 /* Emit text to declare externally defined variables and functions, because
9995 the Intel assembler does not support undefined externals. */
9998 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10000 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10001 set in order to avoid putting out names that are never really
10003 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10005 /* maybe_assemble_visibility will return 1 if the assembler
10006 visibility directive is output. */
10007 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10008 && maybe_assemble_visibility (decl
));
10010 #ifdef DO_CRTL_NAMES
10014 /* GNU as does not need anything here, but the HP linker does
10015 need something for external functions. */
10016 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10017 && TREE_CODE (decl
) == FUNCTION_DECL
)
10018 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10019 else if (need_visibility
&& !TARGET_GNU_AS
)
10020 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10024 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10025 modes of word_mode and larger. Rename the TFmode libfuncs using the
10026 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10027 backward compatibility. */
10030 ia64_init_libfuncs (void)
10032 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10033 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10034 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10035 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10037 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10038 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10039 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10040 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10041 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10043 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10044 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10045 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10046 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10047 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10048 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10050 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10051 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10052 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10053 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10054 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10056 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10057 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10058 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10059 /* HP-UX 11.23 libc does not have a function for unsigned
10060 SImode-to-TFmode conversion. */
10061 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10064 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10067 ia64_hpux_init_libfuncs (void)
10069 ia64_init_libfuncs ();
10071 /* The HP SI millicode division and mod functions expect DI arguments.
10072 By turning them off completely we avoid using both libgcc and the
10073 non-standard millicode routines and use the HP DI millicode routines
10076 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10077 set_optab_libfunc (udiv_optab
, SImode
, 0);
10078 set_optab_libfunc (smod_optab
, SImode
, 0);
10079 set_optab_libfunc (umod_optab
, SImode
, 0);
10081 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10082 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10083 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10084 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10086 /* HP-UX libc has TF min/max/abs routines in it. */
10087 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10088 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10089 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10091 /* ia64_expand_compare uses this. */
10092 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10094 /* These should never be used. */
10095 set_optab_libfunc (eq_optab
, TFmode
, 0);
10096 set_optab_libfunc (ne_optab
, TFmode
, 0);
10097 set_optab_libfunc (gt_optab
, TFmode
, 0);
10098 set_optab_libfunc (ge_optab
, TFmode
, 0);
10099 set_optab_libfunc (lt_optab
, TFmode
, 0);
10100 set_optab_libfunc (le_optab
, TFmode
, 0);
10103 /* Rename the division and modulus functions in VMS. */
10106 ia64_vms_init_libfuncs (void)
10108 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10109 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10110 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10111 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10112 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10113 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10114 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10115 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10116 abort_libfunc
= init_one_libfunc ("decc$abort");
10117 memcmp_libfunc
= init_one_libfunc ("decc$memcmp");
10118 #ifdef MEM_LIBFUNCS_INIT
10123 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10124 the HPUX conventions. */
10127 ia64_sysv4_init_libfuncs (void)
10129 ia64_init_libfuncs ();
10131 /* These functions are not part of the HPUX TFmode interface. We
10132 use them instead of _U_Qfcmp, which doesn't work the way we
10134 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10135 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10136 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10137 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10138 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10139 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10141 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10142 glibc doesn't have them. */
10148 ia64_soft_fp_init_libfuncs (void)
10153 ia64_vms_valid_pointer_mode (enum machine_mode mode
)
10155 return (mode
== SImode
|| mode
== DImode
);
10158 /* For HPUX, it is illegal to have relocations in shared segments. */
10161 ia64_hpux_reloc_rw_mask (void)
10166 /* For others, relax this so that relocations to local data goes in
10167 read-only segments, but we still cannot allow global relocations
10168 in read-only segments. */
10171 ia64_reloc_rw_mask (void)
10173 return flag_pic
? 3 : 2;
10176 /* Return the section to use for X. The only special thing we do here
10177 is to honor small data. */
10180 ia64_select_rtx_section (enum machine_mode mode
, rtx x
,
10181 unsigned HOST_WIDE_INT align
)
10183 if (GET_MODE_SIZE (mode
) > 0
10184 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10185 && !TARGET_NO_SDATA
)
10186 return sdata_section
;
10188 return default_elf_select_rtx_section (mode
, x
, align
);
10191 static unsigned int
10192 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10194 unsigned int flags
= 0;
10196 if (strcmp (name
, ".sdata") == 0
10197 || strncmp (name
, ".sdata.", 7) == 0
10198 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
10199 || strncmp (name
, ".sdata2.", 8) == 0
10200 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
10201 || strcmp (name
, ".sbss") == 0
10202 || strncmp (name
, ".sbss.", 6) == 0
10203 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
10204 flags
= SECTION_SMALL
;
10206 #if TARGET_ABI_OPEN_VMS
10207 if (decl
&& DECL_ATTRIBUTES (decl
)
10208 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl
)))
10209 flags
|= SECTION_VMS_OVERLAY
;
10212 flags
|= default_section_type_flags (decl
, name
, reloc
);
10216 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10217 structure type and that the address of that type should be passed
10218 in out0, rather than in r8. */
10221 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10223 tree ret_type
= TREE_TYPE (fntype
);
10225 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10226 as the structure return address parameter, if the return value
10227 type has a non-trivial copy constructor or destructor. It is not
10228 clear if this same convention should be used for other
10229 programming languages. Until G++ 3.4, we incorrectly used r8 for
10230 these return values. */
10231 return (abi_version_at_least (2)
10233 && TYPE_MODE (ret_type
) == BLKmode
10234 && TREE_ADDRESSABLE (ret_type
)
10235 && strcmp (lang_hooks
.name
, "GNU C++") == 0);
10238 /* Output the assembler code for a thunk function. THUNK_DECL is the
10239 declaration for the thunk function itself, FUNCTION is the decl for
10240 the target function. DELTA is an immediate constant offset to be
10241 added to THIS. If VCALL_OFFSET is nonzero, the word at
10242 *(*this + vcall_offset) should be added to THIS. */
10245 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10246 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10249 rtx this_rtx
, insn
, funexp
;
10250 unsigned int this_parmno
;
10251 unsigned int this_regno
;
10254 reload_completed
= 1;
10255 epilogue_completed
= 1;
10257 /* Set things up as ia64_expand_prologue might. */
10258 last_scratch_gr_reg
= 15;
10260 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10261 current_frame_info
.spill_cfa_off
= -16;
10262 current_frame_info
.n_input_regs
= 1;
10263 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10265 /* Mark the end of the (empty) prologue. */
10266 emit_note (NOTE_INSN_PROLOGUE_END
);
10268 /* Figure out whether "this" will be the first parameter (the
10269 typical case) or the second parameter (as happens when the
10270 virtual function returns certain class objects). */
10272 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10274 this_regno
= IN_REG (this_parmno
);
10275 if (!TARGET_REG_NAMES
)
10276 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10278 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10280 /* Apply the constant offset, if required. */
10281 delta_rtx
= GEN_INT (delta
);
10284 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10285 REG_POINTER (tmp
) = 1;
10286 if (delta
&& satisfies_constraint_I (delta_rtx
))
10288 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10292 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10296 if (!satisfies_constraint_I (delta_rtx
))
10298 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10299 emit_move_insn (tmp
, delta_rtx
);
10302 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
10305 /* Apply the offset from the vtable, if required. */
10308 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
10309 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10313 rtx t
= gen_rtx_REG (ptr_mode
, 2);
10314 REG_POINTER (t
) = 1;
10315 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
10316 if (satisfies_constraint_I (vcall_offset_rtx
))
10318 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
10322 emit_insn (gen_ptr_extend (tmp
, t
));
10325 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
10329 if (!satisfies_constraint_J (vcall_offset_rtx
))
10331 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
10332 emit_move_insn (tmp2
, vcall_offset_rtx
);
10333 vcall_offset_rtx
= tmp2
;
10335 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
10339 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
10341 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
10343 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
10346 /* Generate a tail call to the target function. */
10347 if (! TREE_USED (function
))
10349 assemble_external (function
);
10350 TREE_USED (function
) = 1;
10352 funexp
= XEXP (DECL_RTL (function
), 0);
10353 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
10354 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
10355 insn
= get_last_insn ();
10356 SIBLING_CALL_P (insn
) = 1;
10358 /* Code generation for calls relies on splitting. */
10359 reload_completed
= 1;
10360 epilogue_completed
= 1;
10361 try_split (PATTERN (insn
), insn
, 0);
10365 /* Run just enough of rest_of_compilation to get the insns emitted.
10366 There's not really enough bulk here to make other passes such as
10367 instruction scheduling worth while. Note that use_thunk calls
10368 assemble_start_function and assemble_end_function. */
10370 insn_locators_alloc ();
10371 emit_all_insn_group_barriers (NULL
);
10372 insn
= get_insns ();
10373 shorten_branches (insn
);
10374 final_start_function (insn
, file
, 1);
10375 final (insn
, file
, 1);
10376 final_end_function ();
10378 reload_completed
= 0;
10379 epilogue_completed
= 0;
10382 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10385 ia64_struct_value_rtx (tree fntype
,
10386 int incoming ATTRIBUTE_UNUSED
)
10388 if (TARGET_ABI_OPEN_VMS
||
10389 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
10391 return gen_rtx_REG (Pmode
, GR_REG (8));
10395 ia64_scalar_mode_supported_p (enum machine_mode mode
)
10421 ia64_vector_mode_supported_p (enum machine_mode mode
)
10438 /* Implement the FUNCTION_PROFILER macro. */
10441 ia64_output_function_profiler (FILE *file
, int labelno
)
10443 bool indirect_call
;
10445 /* If the function needs a static chain and the static chain
10446 register is r15, we use an indirect call so as to bypass
10447 the PLT stub in case the executable is dynamically linked,
10448 because the stub clobbers r15 as per 5.3.6 of the psABI.
10449 We don't need to do that in non canonical PIC mode. */
10451 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
10453 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
10454 indirect_call
= true;
10457 indirect_call
= false;
10460 fputs ("\t.prologue 4, r40\n", file
);
10462 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
10463 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
10465 if (NO_PROFILE_COUNTERS
)
10466 fputs ("\tmov out3 = r0\n", file
);
10470 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
10472 if (TARGET_AUTO_PIC
)
10473 fputs ("\tmovl out3 = @gprel(", file
);
10475 fputs ("\taddl out3 = @ltoff(", file
);
10476 assemble_name (file
, buf
);
10477 if (TARGET_AUTO_PIC
)
10478 fputs (")\n", file
);
10480 fputs ("), r1\n", file
);
10484 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
10485 fputs ("\t;;\n", file
);
10487 fputs ("\t.save rp, r42\n", file
);
10488 fputs ("\tmov out2 = b0\n", file
);
10490 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
10491 fputs ("\t.body\n", file
);
10492 fputs ("\tmov out1 = r1\n", file
);
10495 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
10496 fputs ("\tmov b6 = r16\n", file
);
10497 fputs ("\tld8 r1 = [r14]\n", file
);
10498 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
10501 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
10504 static GTY(()) rtx mcount_func_rtx
;
10506 gen_mcount_func_rtx (void)
10508 if (!mcount_func_rtx
)
10509 mcount_func_rtx
= init_one_libfunc ("_mcount");
10510 return mcount_func_rtx
;
10514 ia64_profile_hook (int labelno
)
10518 if (NO_PROFILE_COUNTERS
)
10519 label
= const0_rtx
;
10523 const char *label_name
;
10524 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
10525 label_name
= (*targetm
.strip_name_encoding
) (ggc_strdup (buf
));
10526 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
10527 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
10529 ip
= gen_reg_rtx (Pmode
);
10530 emit_insn (gen_ip_value (ip
));
10531 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
10533 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
10538 /* Return the mangling of TYPE if it is an extended fundamental type. */
10540 static const char *
10541 ia64_mangle_type (const_tree type
)
10543 type
= TYPE_MAIN_VARIANT (type
);
10545 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
10546 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
10549 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10551 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
10553 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10554 an extended mangling. Elsewhere, "e" is available since long
10555 double is 80 bits. */
10556 if (TYPE_MODE (type
) == XFmode
)
10557 return TARGET_HPUX
? "u9__float80" : "e";
10558 if (TYPE_MODE (type
) == RFmode
)
10559 return "u7__fpreg";
10563 /* Return the diagnostic message string if conversion from FROMTYPE to
10564 TOTYPE is not allowed, NULL otherwise. */
10565 static const char *
10566 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
10568 /* Reject nontrivial conversion to or from __fpreg. */
10569 if (TYPE_MODE (fromtype
) == RFmode
10570 && TYPE_MODE (totype
) != RFmode
10571 && TYPE_MODE (totype
) != VOIDmode
)
10572 return N_("invalid conversion from %<__fpreg%>");
10573 if (TYPE_MODE (totype
) == RFmode
10574 && TYPE_MODE (fromtype
) != RFmode
)
10575 return N_("invalid conversion to %<__fpreg%>");
10579 /* Return the diagnostic message string if the unary operation OP is
10580 not permitted on TYPE, NULL otherwise. */
10581 static const char *
10582 ia64_invalid_unary_op (int op
, const_tree type
)
10584 /* Reject operations on __fpreg other than unary + or &. */
10585 if (TYPE_MODE (type
) == RFmode
10586 && op
!= CONVERT_EXPR
10587 && op
!= ADDR_EXPR
)
10588 return N_("invalid operation on %<__fpreg%>");
10592 /* Return the diagnostic message string if the binary operation OP is
10593 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10594 static const char *
10595 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
10597 /* Reject operations on __fpreg. */
10598 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
10599 return N_("invalid operation on %<__fpreg%>");
10603 /* Implement overriding of the optimization options. */
10605 ia64_optimization_options (int level ATTRIBUTE_UNUSED
,
10606 int size ATTRIBUTE_UNUSED
)
10608 /* Let the scheduler form additional regions. */
10609 set_param_value ("max-sched-extend-regions-iters", 2);
10611 /* Set the default values for cache-related parameters. */
10612 set_param_value ("simultaneous-prefetches", 6);
10613 set_param_value ("l1-cache-line-size", 32);
10615 set_param_value("sched-mem-true-dep-cost", 4);
10618 /* HP-UX version_id attribute.
10619 For object foo, if the version_id is set to 1234 put out an alias
10620 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10621 other than an alias statement because it is an illegal symbol name. */
10624 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
10625 tree name ATTRIBUTE_UNUSED
,
10627 int flags ATTRIBUTE_UNUSED
,
10628 bool *no_add_attrs
)
10630 tree arg
= TREE_VALUE (args
);
10632 if (TREE_CODE (arg
) != STRING_CST
)
10634 error("version attribute is not a string");
10635 *no_add_attrs
= true;
10641 /* Target hook for c_mode_for_suffix. */
10643 static enum machine_mode
10644 ia64_c_mode_for_suffix (char suffix
)
10654 static enum machine_mode
10655 ia64_promote_function_mode (const_tree type
,
10656 enum machine_mode mode
,
10658 const_tree funtype
,
10661 /* Special processing required for OpenVMS ... */
10663 if (!TARGET_ABI_OPEN_VMS
)
10664 return default_promote_function_mode(type
, mode
, punsignedp
, funtype
,
10667 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10668 HP OpenVMS I64 Version 8.2EFT,
10669 chapter 4 "OpenVMS I64 Conventions"
10670 section 4.7 "Procedure Linkage"
10671 subsection 4.7.5.2, "Normal Register Parameters"
10673 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10674 values passed in registers are zero-filled; signed integral values as
10675 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10676 For all other types passed in the general registers, unused bits are
10679 if (!AGGREGATE_TYPE_P (type
)
10680 && GET_MODE_CLASS (mode
) == MODE_INT
10681 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
10683 if (mode
== SImode
)
10688 return promote_mode (type
, mode
, punsignedp
);
10691 static GTY(()) rtx ia64_dconst_0_5_rtx
;
10694 ia64_dconst_0_5 (void)
10696 if (! ia64_dconst_0_5_rtx
)
10698 REAL_VALUE_TYPE rv
;
10699 real_from_string (&rv
, "0.5");
10700 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
10702 return ia64_dconst_0_5_rtx
;
10705 static GTY(()) rtx ia64_dconst_0_375_rtx
;
10708 ia64_dconst_0_375 (void)
10710 if (! ia64_dconst_0_375_rtx
)
10712 REAL_VALUE_TYPE rv
;
10713 real_from_string (&rv
, "0.375");
10714 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
10716 return ia64_dconst_0_375_rtx
;
10720 #include "gt-ia64.h"