Merged revision 156805 into branch.
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob384de6d9854b121f9aa4cb53472163b57d99da01
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3 2009
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "real.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "except.h"
42 #include "function.h"
43 #include "ggc.h"
44 #include "basic-block.h"
45 #include "libfuncs.h"
46 #include "toplev.h"
47 #include "sched-int.h"
48 #include "timevar.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "tm_p.h"
52 #include "hashtab.h"
53 #include "langhooks.h"
54 #include "cfglayout.h"
55 #include "gimple.h"
56 #include "intl.h"
57 #include "df.h"
58 #include "debug.h"
59 #include "params.h"
60 #include "dbgcnt.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
64 /* This is used for communication between ASM_OUTPUT_LABEL and
65 ASM_OUTPUT_LABELREF. */
66 int ia64_asm_output_label = 0;
68 /* Register names for ia64_expand_prologue. */
69 static const char * const ia64_reg_numbers[96] =
70 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79 "r104","r105","r106","r107","r108","r109","r110","r111",
80 "r112","r113","r114","r115","r116","r117","r118","r119",
81 "r120","r121","r122","r123","r124","r125","r126","r127"};
83 /* ??? These strings could be shared with REGISTER_NAMES. */
84 static const char * const ia64_input_reg_names[8] =
85 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
87 /* ??? These strings could be shared with REGISTER_NAMES. */
88 static const char * const ia64_local_reg_names[80] =
89 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
100 /* ??? These strings could be shared with REGISTER_NAMES. */
101 static const char * const ia64_output_reg_names[8] =
102 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
104 /* Which cpu are we scheduling for. */
105 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
107 /* Determines whether we run our final scheduling pass or not. We always
108 avoid the normal second scheduling pass. */
109 static int ia64_flag_schedule_insns2;
111 /* Determines whether we run variable tracking in machine dependent
112 reorganization. */
113 static int ia64_flag_var_tracking;
115 /* Variables which are this size or smaller are put in the sdata/sbss
116 sections. */
118 unsigned int ia64_section_threshold;
120 /* The following variable is used by the DFA insn scheduler. The value is
121 TRUE if we do insn bundling instead of insn scheduling. */
122 int bundling_p = 0;
124 enum ia64_frame_regs
126 reg_fp,
127 reg_save_b0,
128 reg_save_pr,
129 reg_save_ar_pfs,
130 reg_save_ar_unat,
131 reg_save_ar_lc,
132 reg_save_gp,
133 number_of_ia64_frame_regs
136 /* Structure to be filled in by ia64_compute_frame_size with register
137 save masks and offsets for the current function. */
139 struct ia64_frame_info
141 HOST_WIDE_INT total_size; /* size of the stack frame, not including
142 the caller's scratch area. */
143 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
144 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
145 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
146 HARD_REG_SET mask; /* mask of saved registers. */
147 unsigned int gr_used_mask; /* mask of registers in use as gr spill
148 registers or long-term scratches. */
149 int n_spilled; /* number of spilled registers. */
150 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
151 int n_input_regs; /* number of input registers used. */
152 int n_local_regs; /* number of local registers used. */
153 int n_output_regs; /* number of output registers used. */
154 int n_rotate_regs; /* number of rotating registers used. */
156 char need_regstk; /* true if a .regstk directive needed. */
157 char initialized; /* true if the data is finalized. */
160 /* Current frame information calculated by ia64_compute_frame_size. */
161 static struct ia64_frame_info current_frame_info;
162 /* The actual registers that are emitted. */
163 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
165 static int ia64_first_cycle_multipass_dfa_lookahead (void);
166 static void ia64_dependencies_evaluation_hook (rtx, rtx);
167 static void ia64_init_dfa_pre_cycle_insn (void);
168 static rtx ia64_dfa_pre_cycle_insn (void);
169 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
170 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
171 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
172 static void ia64_h_i_d_extended (void);
173 static void * ia64_alloc_sched_context (void);
174 static void ia64_init_sched_context (void *, bool);
175 static void ia64_set_sched_context (void *);
176 static void ia64_clear_sched_context (void *);
177 static void ia64_free_sched_context (void *);
178 static int ia64_mode_to_int (enum machine_mode);
179 static void ia64_set_sched_flags (spec_info_t);
180 static ds_t ia64_get_insn_spec_ds (rtx);
181 static ds_t ia64_get_insn_checked_ds (rtx);
182 static bool ia64_skip_rtx_p (const_rtx);
183 static int ia64_speculate_insn (rtx, ds_t, rtx *);
184 static bool ia64_needs_block_p (int);
185 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
186 static int ia64_spec_check_p (rtx);
187 static int ia64_spec_check_src_p (rtx);
188 static rtx gen_tls_get_addr (void);
189 static rtx gen_thread_pointer (void);
190 static int find_gr_spill (enum ia64_frame_regs, int);
191 static int next_scratch_gr_reg (void);
192 static void mark_reg_gr_used_mask (rtx, void *);
193 static void ia64_compute_frame_size (HOST_WIDE_INT);
194 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
195 static void finish_spill_pointers (void);
196 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
197 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
198 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
199 static rtx gen_movdi_x (rtx, rtx, rtx);
200 static rtx gen_fr_spill_x (rtx, rtx, rtx);
201 static rtx gen_fr_restore_x (rtx, rtx, rtx);
203 static bool ia64_can_eliminate (const int, const int);
204 static enum machine_mode hfa_element_mode (const_tree, bool);
205 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
206 tree, int *, int);
207 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
208 tree, bool);
209 static bool ia64_function_ok_for_sibcall (tree, tree);
210 static bool ia64_return_in_memory (const_tree, const_tree);
211 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
212 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
213 static void fix_range (const char *);
214 static bool ia64_handle_option (size_t, const char *, int);
215 static struct machine_function * ia64_init_machine_status (void);
216 static void emit_insn_group_barriers (FILE *);
217 static void emit_all_insn_group_barriers (FILE *);
218 static void final_emit_insn_group_barriers (FILE *);
219 static void emit_predicate_relation_info (void);
220 static void ia64_reorg (void);
221 static bool ia64_in_small_data_p (const_tree);
222 static void process_epilogue (FILE *, rtx, bool, bool);
223 static int process_set (FILE *, rtx, rtx, bool, bool);
225 static bool ia64_assemble_integer (rtx, unsigned int, int);
226 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
227 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void ia64_output_function_end_prologue (FILE *);
230 static int ia64_issue_rate (void);
231 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
232 static void ia64_sched_init (FILE *, int, int);
233 static void ia64_sched_init_global (FILE *, int, int);
234 static void ia64_sched_finish_global (FILE *, int);
235 static void ia64_sched_finish (FILE *, int);
236 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
237 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
238 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
239 static int ia64_variable_issue (FILE *, int, rtx, int);
241 static struct bundle_state *get_free_bundle_state (void);
242 static void free_bundle_state (struct bundle_state *);
243 static void initiate_bundle_states (void);
244 static void finish_bundle_states (void);
245 static unsigned bundle_state_hash (const void *);
246 static int bundle_state_eq_p (const void *, const void *);
247 static int insert_bundle_state (struct bundle_state *);
248 static void initiate_bundle_state_table (void);
249 static void finish_bundle_state_table (void);
250 static int try_issue_nops (struct bundle_state *, int);
251 static int try_issue_insn (struct bundle_state *, rtx);
252 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
253 static int get_max_pos (state_t);
254 static int get_template (state_t, int);
256 static rtx get_next_important_insn (rtx, rtx);
257 static bool important_for_bundling_p (rtx);
258 static void bundling (FILE *, int, rtx, rtx);
260 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
261 HOST_WIDE_INT, tree);
262 static void ia64_file_start (void);
263 static void ia64_globalize_decl_name (FILE *, tree);
265 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
266 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
267 static section *ia64_select_rtx_section (enum machine_mode, rtx,
268 unsigned HOST_WIDE_INT);
269 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
270 ATTRIBUTE_UNUSED;
271 static unsigned int ia64_section_type_flags (tree, const char *, int);
272 static void ia64_init_libfuncs (void)
273 ATTRIBUTE_UNUSED;
274 static void ia64_hpux_init_libfuncs (void)
275 ATTRIBUTE_UNUSED;
276 static void ia64_sysv4_init_libfuncs (void)
277 ATTRIBUTE_UNUSED;
278 static void ia64_vms_init_libfuncs (void)
279 ATTRIBUTE_UNUSED;
280 static void ia64_soft_fp_init_libfuncs (void)
281 ATTRIBUTE_UNUSED;
282 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
283 ATTRIBUTE_UNUSED;
284 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
285 ATTRIBUTE_UNUSED;
287 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
288 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
289 static void ia64_encode_section_info (tree, rtx, int);
290 static rtx ia64_struct_value_rtx (tree, int);
291 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
292 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
293 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
294 static bool ia64_cannot_force_const_mem (rtx);
295 static const char *ia64_mangle_type (const_tree);
296 static const char *ia64_invalid_conversion (const_tree, const_tree);
297 static const char *ia64_invalid_unary_op (int, const_tree);
298 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
299 static enum machine_mode ia64_c_mode_for_suffix (char);
300 static enum machine_mode ia64_promote_function_mode (const_tree,
301 enum machine_mode,
302 int *,
303 const_tree,
304 int);
305 static void ia64_trampoline_init (rtx, tree, rtx);
306 static void ia64_override_options_after_change (void);
308 /* Table of valid machine attributes. */
309 static const struct attribute_spec ia64_attribute_table[] =
311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
312 { "syscall_linkage", 0, 0, false, true, true, NULL },
313 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
314 #if TARGET_ABI_OPEN_VMS
315 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
316 #endif
317 { "version_id", 1, 1, true, false, false,
318 ia64_handle_version_id_attribute },
319 { NULL, 0, 0, false, false, false, NULL }
322 /* Initialize the GCC target structure. */
323 #undef TARGET_ATTRIBUTE_TABLE
324 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
326 #undef TARGET_INIT_BUILTINS
327 #define TARGET_INIT_BUILTINS ia64_init_builtins
329 #undef TARGET_EXPAND_BUILTIN
330 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
332 #undef TARGET_ASM_BYTE_OP
333 #define TARGET_ASM_BYTE_OP "\tdata1\t"
334 #undef TARGET_ASM_ALIGNED_HI_OP
335 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
338 #undef TARGET_ASM_ALIGNED_DI_OP
339 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
340 #undef TARGET_ASM_UNALIGNED_HI_OP
341 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
342 #undef TARGET_ASM_UNALIGNED_SI_OP
343 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
344 #undef TARGET_ASM_UNALIGNED_DI_OP
345 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
346 #undef TARGET_ASM_INTEGER
347 #define TARGET_ASM_INTEGER ia64_assemble_integer
349 #undef TARGET_ASM_FUNCTION_PROLOGUE
350 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
351 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
352 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
353 #undef TARGET_ASM_FUNCTION_EPILOGUE
354 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
356 #undef TARGET_IN_SMALL_DATA_P
357 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
359 #undef TARGET_SCHED_ADJUST_COST_2
360 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
361 #undef TARGET_SCHED_ISSUE_RATE
362 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
365 #undef TARGET_SCHED_INIT
366 #define TARGET_SCHED_INIT ia64_sched_init
367 #undef TARGET_SCHED_FINISH
368 #define TARGET_SCHED_FINISH ia64_sched_finish
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
371 #undef TARGET_SCHED_FINISH_GLOBAL
372 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
373 #undef TARGET_SCHED_REORDER
374 #define TARGET_SCHED_REORDER ia64_sched_reorder
375 #undef TARGET_SCHED_REORDER2
376 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
378 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
379 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
381 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
382 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
384 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
385 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
386 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
387 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
389 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
390 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
391 ia64_first_cycle_multipass_dfa_lookahead_guard
393 #undef TARGET_SCHED_DFA_NEW_CYCLE
394 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
396 #undef TARGET_SCHED_H_I_D_EXTENDED
397 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
399 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
400 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
402 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
403 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
405 #undef TARGET_SCHED_SET_SCHED_CONTEXT
406 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
408 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
409 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
411 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
412 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
414 #undef TARGET_SCHED_SET_SCHED_FLAGS
415 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
417 #undef TARGET_SCHED_GET_INSN_SPEC_DS
418 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
420 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
421 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
423 #undef TARGET_SCHED_SPECULATE_INSN
424 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
426 #undef TARGET_SCHED_NEEDS_BLOCK_P
427 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
429 #undef TARGET_SCHED_GEN_SPEC_CHECK
430 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
432 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
433 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
434 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
436 #undef TARGET_SCHED_SKIP_RTX_P
437 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
439 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
440 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
444 #undef TARGET_ASM_OUTPUT_MI_THUNK
445 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
446 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
447 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
449 #undef TARGET_ASM_FILE_START
450 #define TARGET_ASM_FILE_START ia64_file_start
452 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
453 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
455 #undef TARGET_RTX_COSTS
456 #define TARGET_RTX_COSTS ia64_rtx_costs
457 #undef TARGET_ADDRESS_COST
458 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
460 #undef TARGET_UNSPEC_MAY_TRAP_P
461 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
463 #undef TARGET_MACHINE_DEPENDENT_REORG
464 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
469 #undef TARGET_SECTION_TYPE_FLAGS
470 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
472 #ifdef HAVE_AS_TLS
473 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
474 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
475 #endif
477 #undef TARGET_PROMOTE_FUNCTION_MODE
478 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
480 /* ??? Investigate. */
481 #if 0
482 #undef TARGET_PROMOTE_PROTOTYPES
483 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
484 #endif
486 #undef TARGET_STRUCT_VALUE_RTX
487 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
488 #undef TARGET_RETURN_IN_MEMORY
489 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
490 #undef TARGET_SETUP_INCOMING_VARARGS
491 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
492 #undef TARGET_STRICT_ARGUMENT_NAMING
493 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
494 #undef TARGET_MUST_PASS_IN_STACK
495 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
497 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
498 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
500 #undef TARGET_UNWIND_EMIT
501 #define TARGET_UNWIND_EMIT process_for_unwind_directive
503 #undef TARGET_SCALAR_MODE_SUPPORTED_P
504 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
506 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
508 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
509 in an order different from the specified program order. */
510 #undef TARGET_RELAXED_ORDERING
511 #define TARGET_RELAXED_ORDERING true
513 #undef TARGET_DEFAULT_TARGET_FLAGS
514 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
515 #undef TARGET_HANDLE_OPTION
516 #define TARGET_HANDLE_OPTION ia64_handle_option
518 #undef TARGET_CANNOT_FORCE_CONST_MEM
519 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
521 #undef TARGET_MANGLE_TYPE
522 #define TARGET_MANGLE_TYPE ia64_mangle_type
524 #undef TARGET_INVALID_CONVERSION
525 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
526 #undef TARGET_INVALID_UNARY_OP
527 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
528 #undef TARGET_INVALID_BINARY_OP
529 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
531 #undef TARGET_C_MODE_FOR_SUFFIX
532 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
534 #undef TARGET_CAN_ELIMINATE
535 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
537 #undef TARGET_TRAMPOLINE_INIT
538 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
540 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
541 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
543 struct gcc_target targetm = TARGET_INITIALIZER;
545 typedef enum
547 ADDR_AREA_NORMAL, /* normal address area */
548 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
550 ia64_addr_area;
552 static GTY(()) tree small_ident1;
553 static GTY(()) tree small_ident2;
555 static void
556 init_idents (void)
558 if (small_ident1 == 0)
560 small_ident1 = get_identifier ("small");
561 small_ident2 = get_identifier ("__small__");
565 /* Retrieve the address area that has been chosen for the given decl. */
567 static ia64_addr_area
568 ia64_get_addr_area (tree decl)
570 tree model_attr;
572 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
573 if (model_attr)
575 tree id;
577 init_idents ();
578 id = TREE_VALUE (TREE_VALUE (model_attr));
579 if (id == small_ident1 || id == small_ident2)
580 return ADDR_AREA_SMALL;
582 return ADDR_AREA_NORMAL;
585 static tree
586 ia64_handle_model_attribute (tree *node, tree name, tree args,
587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
589 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
590 ia64_addr_area area;
591 tree arg, decl = *node;
593 init_idents ();
594 arg = TREE_VALUE (args);
595 if (arg == small_ident1 || arg == small_ident2)
597 addr_area = ADDR_AREA_SMALL;
599 else
601 warning (OPT_Wattributes, "invalid argument of %qE attribute",
602 name);
603 *no_add_attrs = true;
606 switch (TREE_CODE (decl))
608 case VAR_DECL:
609 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
610 == FUNCTION_DECL)
611 && !TREE_STATIC (decl))
613 error_at (DECL_SOURCE_LOCATION (decl),
614 "an address area attribute cannot be specified for "
615 "local variables");
616 *no_add_attrs = true;
618 area = ia64_get_addr_area (decl);
619 if (area != ADDR_AREA_NORMAL && addr_area != area)
621 error ("address area of %q+D conflicts with previous "
622 "declaration", decl);
623 *no_add_attrs = true;
625 break;
627 case FUNCTION_DECL:
628 error_at (DECL_SOURCE_LOCATION (decl),
629 "address area attribute cannot be specified for "
630 "functions");
631 *no_add_attrs = true;
632 break;
634 default:
635 warning (OPT_Wattributes, "%qE attribute ignored",
636 name);
637 *no_add_attrs = true;
638 break;
641 return NULL_TREE;
644 /* The section must have global and overlaid attributes. */
645 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
647 /* Part of the low level implementation of DEC Ada pragma Common_Object which
648 enables the shared use of variables stored in overlaid linker areas
649 corresponding to the use of Fortran COMMON. */
651 static tree
652 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
653 int flags ATTRIBUTE_UNUSED,
654 bool *no_add_attrs)
656 tree decl = *node;
657 tree id, val;
658 if (! DECL_P (decl))
659 abort ();
661 DECL_COMMON (decl) = 1;
662 id = TREE_VALUE (args);
663 if (TREE_CODE (id) == IDENTIFIER_NODE)
664 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
665 else if (TREE_CODE (id) == STRING_CST)
666 val = id;
667 else
669 warning (OPT_Wattributes,
670 "%qE attribute requires a string constant argument", name);
671 *no_add_attrs = true;
672 return NULL_TREE;
674 DECL_SECTION_NAME (decl) = val;
675 return NULL_TREE;
678 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
680 void
681 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
682 unsigned HOST_WIDE_INT size,
683 unsigned int align)
685 tree attr = DECL_ATTRIBUTES (decl);
687 /* As common_object attribute set DECL_SECTION_NAME check it before
688 looking up the attribute. */
689 if (DECL_SECTION_NAME (decl) && attr)
690 attr = lookup_attribute ("common_object", attr);
691 else
692 attr = NULL_TREE;
694 if (!attr)
696 /* Code from elfos.h. */
697 fprintf (file, "%s", COMMON_ASM_OP);
698 assemble_name (file, name);
699 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
700 size, align / BITS_PER_UNIT);
702 else
704 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
705 ASM_OUTPUT_LABEL (file, name);
706 ASM_OUTPUT_SKIP (file, size ? size : 1);
710 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
712 void
713 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
714 tree decl)
716 if (!(flags & SECTION_VMS_OVERLAY))
718 default_elf_asm_named_section (name, flags, decl);
719 return;
721 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
722 abort ();
724 if (flags & SECTION_DECLARED)
726 fprintf (asm_out_file, "\t.section\t%s\n", name);
727 return;
730 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
733 static void
734 ia64_encode_addr_area (tree decl, rtx symbol)
736 int flags;
738 flags = SYMBOL_REF_FLAGS (symbol);
739 switch (ia64_get_addr_area (decl))
741 case ADDR_AREA_NORMAL: break;
742 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
743 default: gcc_unreachable ();
745 SYMBOL_REF_FLAGS (symbol) = flags;
748 static void
749 ia64_encode_section_info (tree decl, rtx rtl, int first)
751 default_encode_section_info (decl, rtl, first);
753 /* Careful not to prod global register variables. */
754 if (TREE_CODE (decl) == VAR_DECL
755 && GET_CODE (DECL_RTL (decl)) == MEM
756 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
757 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
758 ia64_encode_addr_area (decl, XEXP (rtl, 0));
761 /* Return 1 if the operands of a move are ok. */
764 ia64_move_ok (rtx dst, rtx src)
766 /* If we're under init_recog_no_volatile, we'll not be able to use
767 memory_operand. So check the code directly and don't worry about
768 the validity of the underlying address, which should have been
769 checked elsewhere anyway. */
770 if (GET_CODE (dst) != MEM)
771 return 1;
772 if (GET_CODE (src) == MEM)
773 return 0;
774 if (register_operand (src, VOIDmode))
775 return 1;
777 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
778 if (INTEGRAL_MODE_P (GET_MODE (dst)))
779 return src == const0_rtx;
780 else
781 return satisfies_constraint_G (src);
784 /* Return 1 if the operands are ok for a floating point load pair. */
787 ia64_load_pair_ok (rtx dst, rtx src)
789 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
790 return 0;
791 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
792 return 0;
793 switch (GET_CODE (XEXP (src, 0)))
795 case REG:
796 case POST_INC:
797 break;
798 case POST_DEC:
799 return 0;
800 case POST_MODIFY:
802 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
804 if (GET_CODE (adjust) != CONST_INT
805 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
806 return 0;
808 break;
809 default:
810 abort ();
812 return 1;
816 addp4_optimize_ok (rtx op1, rtx op2)
818 return (basereg_operand (op1, GET_MODE(op1)) !=
819 basereg_operand (op2, GET_MODE(op2)));
822 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
823 Return the length of the field, or <= 0 on failure. */
826 ia64_depz_field_mask (rtx rop, rtx rshift)
828 unsigned HOST_WIDE_INT op = INTVAL (rop);
829 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
831 /* Get rid of the zero bits we're shifting in. */
832 op >>= shift;
834 /* We must now have a solid block of 1's at bit 0. */
835 return exact_log2 (op + 1);
838 /* Return the TLS model to use for ADDR. */
840 static enum tls_model
841 tls_symbolic_operand_type (rtx addr)
843 enum tls_model tls_kind = TLS_MODEL_NONE;
845 if (GET_CODE (addr) == CONST)
847 if (GET_CODE (XEXP (addr, 0)) == PLUS
848 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
849 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
851 else if (GET_CODE (addr) == SYMBOL_REF)
852 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
854 return tls_kind;
857 /* Return true if X is a constant that is valid for some immediate
858 field in an instruction. */
860 bool
861 ia64_legitimate_constant_p (rtx x)
863 switch (GET_CODE (x))
865 case CONST_INT:
866 case LABEL_REF:
867 return true;
869 case CONST_DOUBLE:
870 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
871 || GET_MODE (x) == DFmode)
872 return true;
873 return satisfies_constraint_G (x);
875 case CONST:
876 case SYMBOL_REF:
877 /* ??? Short term workaround for PR 28490. We must make the code here
878 match the code in ia64_expand_move and move_operand, even though they
879 are both technically wrong. */
880 if (tls_symbolic_operand_type (x) == 0)
882 HOST_WIDE_INT addend = 0;
883 rtx op = x;
885 if (GET_CODE (op) == CONST
886 && GET_CODE (XEXP (op, 0)) == PLUS
887 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
889 addend = INTVAL (XEXP (XEXP (op, 0), 1));
890 op = XEXP (XEXP (op, 0), 0);
893 if (any_offset_symbol_operand (op, GET_MODE (op))
894 || function_operand (op, GET_MODE (op)))
895 return true;
896 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
897 return (addend & 0x3fff) == 0;
898 return false;
900 return false;
902 case CONST_VECTOR:
904 enum machine_mode mode = GET_MODE (x);
906 if (mode == V2SFmode)
907 return satisfies_constraint_Y (x);
909 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
910 && GET_MODE_SIZE (mode) <= 8);
913 default:
914 return false;
918 /* Don't allow TLS addresses to get spilled to memory. */
920 static bool
921 ia64_cannot_force_const_mem (rtx x)
923 if (GET_MODE (x) == RFmode)
924 return true;
925 return tls_symbolic_operand_type (x) != 0;
928 /* Expand a symbolic constant load. */
930 bool
931 ia64_expand_load_address (rtx dest, rtx src)
933 gcc_assert (GET_CODE (dest) == REG);
935 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
936 having to pointer-extend the value afterward. Other forms of address
937 computation below are also more natural to compute as 64-bit quantities.
938 If we've been given an SImode destination register, change it. */
939 if (GET_MODE (dest) != Pmode)
940 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
941 byte_lowpart_offset (Pmode, GET_MODE (dest)));
943 if (TARGET_NO_PIC)
944 return false;
945 if (small_addr_symbolic_operand (src, VOIDmode))
946 return false;
948 if (TARGET_AUTO_PIC)
949 emit_insn (gen_load_gprel64 (dest, src));
950 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
951 emit_insn (gen_load_fptr (dest, src));
952 else if (sdata_symbolic_operand (src, VOIDmode))
953 emit_insn (gen_load_gprel (dest, src));
954 else
956 HOST_WIDE_INT addend = 0;
957 rtx tmp;
959 /* We did split constant offsets in ia64_expand_move, and we did try
960 to keep them split in move_operand, but we also allowed reload to
961 rematerialize arbitrary constants rather than spill the value to
962 the stack and reload it. So we have to be prepared here to split
963 them apart again. */
964 if (GET_CODE (src) == CONST)
966 HOST_WIDE_INT hi, lo;
968 hi = INTVAL (XEXP (XEXP (src, 0), 1));
969 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
970 hi = hi - lo;
972 if (lo != 0)
974 addend = lo;
975 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
979 tmp = gen_rtx_HIGH (Pmode, src);
980 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
981 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
983 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
984 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
986 if (addend)
988 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
989 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
993 return true;
996 static GTY(()) rtx gen_tls_tga;
997 static rtx
998 gen_tls_get_addr (void)
1000 if (!gen_tls_tga)
1001 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1002 return gen_tls_tga;
1005 static GTY(()) rtx thread_pointer_rtx;
1006 static rtx
1007 gen_thread_pointer (void)
1009 if (!thread_pointer_rtx)
1010 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1011 return thread_pointer_rtx;
1014 static rtx
1015 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1016 rtx orig_op1, HOST_WIDE_INT addend)
1018 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1019 rtx orig_op0 = op0;
1020 HOST_WIDE_INT addend_lo, addend_hi;
1022 switch (tls_kind)
1024 case TLS_MODEL_GLOBAL_DYNAMIC:
1025 start_sequence ();
1027 tga_op1 = gen_reg_rtx (Pmode);
1028 emit_insn (gen_load_dtpmod (tga_op1, op1));
1030 tga_op2 = gen_reg_rtx (Pmode);
1031 emit_insn (gen_load_dtprel (tga_op2, op1));
1033 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1034 LCT_CONST, Pmode, 2, tga_op1,
1035 Pmode, tga_op2, Pmode);
1037 insns = get_insns ();
1038 end_sequence ();
1040 if (GET_MODE (op0) != Pmode)
1041 op0 = tga_ret;
1042 emit_libcall_block (insns, op0, tga_ret, op1);
1043 break;
1045 case TLS_MODEL_LOCAL_DYNAMIC:
1046 /* ??? This isn't the completely proper way to do local-dynamic
1047 If the call to __tls_get_addr is used only by a single symbol,
1048 then we should (somehow) move the dtprel to the second arg
1049 to avoid the extra add. */
1050 start_sequence ();
1052 tga_op1 = gen_reg_rtx (Pmode);
1053 emit_insn (gen_load_dtpmod (tga_op1, op1));
1055 tga_op2 = const0_rtx;
1057 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1058 LCT_CONST, Pmode, 2, tga_op1,
1059 Pmode, tga_op2, Pmode);
1061 insns = get_insns ();
1062 end_sequence ();
1064 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1065 UNSPEC_LD_BASE);
1066 tmp = gen_reg_rtx (Pmode);
1067 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1069 if (!register_operand (op0, Pmode))
1070 op0 = gen_reg_rtx (Pmode);
1071 if (TARGET_TLS64)
1073 emit_insn (gen_load_dtprel (op0, op1));
1074 emit_insn (gen_adddi3 (op0, tmp, op0));
1076 else
1077 emit_insn (gen_add_dtprel (op0, op1, tmp));
1078 break;
1080 case TLS_MODEL_INITIAL_EXEC:
1081 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1082 addend_hi = addend - addend_lo;
1084 op1 = plus_constant (op1, addend_hi);
1085 addend = addend_lo;
1087 tmp = gen_reg_rtx (Pmode);
1088 emit_insn (gen_load_tprel (tmp, op1));
1090 if (!register_operand (op0, Pmode))
1091 op0 = gen_reg_rtx (Pmode);
1092 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1093 break;
1095 case TLS_MODEL_LOCAL_EXEC:
1096 if (!register_operand (op0, Pmode))
1097 op0 = gen_reg_rtx (Pmode);
1099 op1 = orig_op1;
1100 addend = 0;
1101 if (TARGET_TLS64)
1103 emit_insn (gen_load_tprel (op0, op1));
1104 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1106 else
1107 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1108 break;
1110 default:
1111 gcc_unreachable ();
1114 if (addend)
1115 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1116 orig_op0, 1, OPTAB_DIRECT);
1117 if (orig_op0 == op0)
1118 return NULL_RTX;
1119 if (GET_MODE (orig_op0) == Pmode)
1120 return op0;
1121 return gen_lowpart (GET_MODE (orig_op0), op0);
1125 ia64_expand_move (rtx op0, rtx op1)
1127 enum machine_mode mode = GET_MODE (op0);
1129 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1130 op1 = force_reg (mode, op1);
1132 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1134 HOST_WIDE_INT addend = 0;
1135 enum tls_model tls_kind;
1136 rtx sym = op1;
1138 if (GET_CODE (op1) == CONST
1139 && GET_CODE (XEXP (op1, 0)) == PLUS
1140 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1142 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1143 sym = XEXP (XEXP (op1, 0), 0);
1146 tls_kind = tls_symbolic_operand_type (sym);
1147 if (tls_kind)
1148 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1150 if (any_offset_symbol_operand (sym, mode))
1151 addend = 0;
1152 else if (aligned_offset_symbol_operand (sym, mode))
1154 HOST_WIDE_INT addend_lo, addend_hi;
1156 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1157 addend_hi = addend - addend_lo;
1159 if (addend_lo != 0)
1161 op1 = plus_constant (sym, addend_hi);
1162 addend = addend_lo;
1164 else
1165 addend = 0;
1167 else
1168 op1 = sym;
1170 if (reload_completed)
1172 /* We really should have taken care of this offset earlier. */
1173 gcc_assert (addend == 0);
1174 if (ia64_expand_load_address (op0, op1))
1175 return NULL_RTX;
1178 if (addend)
1180 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1182 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1184 op1 = expand_simple_binop (mode, PLUS, subtarget,
1185 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1186 if (op0 == op1)
1187 return NULL_RTX;
1191 return op1;
1194 /* Split a move from OP1 to OP0 conditional on COND. */
1196 void
1197 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1199 rtx insn, first = get_last_insn ();
1201 emit_move_insn (op0, op1);
1203 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1204 if (INSN_P (insn))
1205 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1206 PATTERN (insn));
1209 /* Split a post-reload TImode or TFmode reference into two DImode
1210 components. This is made extra difficult by the fact that we do
1211 not get any scratch registers to work with, because reload cannot
1212 be prevented from giving us a scratch that overlaps the register
1213 pair involved. So instead, when addressing memory, we tweak the
1214 pointer register up and back down with POST_INCs. Or up and not
1215 back down when we can get away with it.
1217 REVERSED is true when the loads must be done in reversed order
1218 (high word first) for correctness. DEAD is true when the pointer
1219 dies with the second insn we generate and therefore the second
1220 address must not carry a postmodify.
1222 May return an insn which is to be emitted after the moves. */
1224 static rtx
1225 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1227 rtx fixup = 0;
1229 switch (GET_CODE (in))
1231 case REG:
1232 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1233 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1234 break;
1236 case CONST_INT:
1237 case CONST_DOUBLE:
1238 /* Cannot occur reversed. */
1239 gcc_assert (!reversed);
1241 if (GET_MODE (in) != TFmode)
1242 split_double (in, &out[0], &out[1]);
1243 else
1244 /* split_double does not understand how to split a TFmode
1245 quantity into a pair of DImode constants. */
1247 REAL_VALUE_TYPE r;
1248 unsigned HOST_WIDE_INT p[2];
1249 long l[4]; /* TFmode is 128 bits */
1251 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1252 real_to_target (l, &r, TFmode);
1254 if (FLOAT_WORDS_BIG_ENDIAN)
1256 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1257 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1259 else
1261 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1262 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1264 out[0] = GEN_INT (p[0]);
1265 out[1] = GEN_INT (p[1]);
1267 break;
1269 case MEM:
1271 rtx base = XEXP (in, 0);
1272 rtx offset;
1274 switch (GET_CODE (base))
1276 case REG:
1277 if (!reversed)
1279 out[0] = adjust_automodify_address
1280 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1281 out[1] = adjust_automodify_address
1282 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1284 else
1286 /* Reversal requires a pre-increment, which can only
1287 be done as a separate insn. */
1288 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1289 out[0] = adjust_automodify_address
1290 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1291 out[1] = adjust_address (in, DImode, 0);
1293 break;
1295 case POST_INC:
1296 gcc_assert (!reversed && !dead);
1298 /* Just do the increment in two steps. */
1299 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1300 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1301 break;
1303 case POST_DEC:
1304 gcc_assert (!reversed && !dead);
1306 /* Add 8, subtract 24. */
1307 base = XEXP (base, 0);
1308 out[0] = adjust_automodify_address
1309 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1310 out[1] = adjust_automodify_address
1311 (in, DImode,
1312 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1314 break;
1316 case POST_MODIFY:
1317 gcc_assert (!reversed && !dead);
1319 /* Extract and adjust the modification. This case is
1320 trickier than the others, because we might have an
1321 index register, or we might have a combined offset that
1322 doesn't fit a signed 9-bit displacement field. We can
1323 assume the incoming expression is already legitimate. */
1324 offset = XEXP (base, 1);
1325 base = XEXP (base, 0);
1327 out[0] = adjust_automodify_address
1328 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1330 if (GET_CODE (XEXP (offset, 1)) == REG)
1332 /* Can't adjust the postmodify to match. Emit the
1333 original, then a separate addition insn. */
1334 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1335 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1337 else
1339 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1340 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1342 /* Again the postmodify cannot be made to match,
1343 but in this case it's more efficient to get rid
1344 of the postmodify entirely and fix up with an
1345 add insn. */
1346 out[1] = adjust_automodify_address (in, DImode, base, 8);
1347 fixup = gen_adddi3
1348 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1350 else
1352 /* Combined offset still fits in the displacement field.
1353 (We cannot overflow it at the high end.) */
1354 out[1] = adjust_automodify_address
1355 (in, DImode, gen_rtx_POST_MODIFY
1356 (Pmode, base, gen_rtx_PLUS
1357 (Pmode, base,
1358 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1362 break;
1364 default:
1365 gcc_unreachable ();
1367 break;
1370 default:
1371 gcc_unreachable ();
1374 return fixup;
1377 /* Split a TImode or TFmode move instruction after reload.
1378 This is used by *movtf_internal and *movti_internal. */
1379 void
1380 ia64_split_tmode_move (rtx operands[])
1382 rtx in[2], out[2], insn;
1383 rtx fixup[2];
1384 bool dead = false;
1385 bool reversed = false;
1387 /* It is possible for reload to decide to overwrite a pointer with
1388 the value it points to. In that case we have to do the loads in
1389 the appropriate order so that the pointer is not destroyed too
1390 early. Also we must not generate a postmodify for that second
1391 load, or rws_access_regno will die. */
1392 if (GET_CODE (operands[1]) == MEM
1393 && reg_overlap_mentioned_p (operands[0], operands[1]))
1395 rtx base = XEXP (operands[1], 0);
1396 while (GET_CODE (base) != REG)
1397 base = XEXP (base, 0);
1399 if (REGNO (base) == REGNO (operands[0]))
1400 reversed = true;
1401 dead = true;
1403 /* Another reason to do the moves in reversed order is if the first
1404 element of the target register pair is also the second element of
1405 the source register pair. */
1406 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1407 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1408 reversed = true;
1410 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1411 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1413 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1414 if (GET_CODE (EXP) == MEM \
1415 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1416 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1417 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1418 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1420 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1421 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1422 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1424 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1425 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1426 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1428 if (fixup[0])
1429 emit_insn (fixup[0]);
1430 if (fixup[1])
1431 emit_insn (fixup[1]);
1433 #undef MAYBE_ADD_REG_INC_NOTE
1436 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1437 through memory plus an extra GR scratch register. Except that you can
1438 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1439 SECONDARY_RELOAD_CLASS, but not both.
1441 We got into problems in the first place by allowing a construct like
1442 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1443 This solution attempts to prevent this situation from occurring. When
1444 we see something like the above, we spill the inner register to memory. */
1446 static rtx
1447 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1449 if (GET_CODE (in) == SUBREG
1450 && GET_MODE (SUBREG_REG (in)) == TImode
1451 && GET_CODE (SUBREG_REG (in)) == REG)
1453 rtx memt = assign_stack_temp (TImode, 16, 0);
1454 emit_move_insn (memt, SUBREG_REG (in));
1455 return adjust_address (memt, mode, 0);
1457 else if (force && GET_CODE (in) == REG)
1459 rtx memx = assign_stack_temp (mode, 16, 0);
1460 emit_move_insn (memx, in);
1461 return memx;
1463 else
1464 return in;
1467 /* Expand the movxf or movrf pattern (MODE says which) with the given
1468 OPERANDS, returning true if the pattern should then invoke
1469 DONE. */
1471 bool
1472 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1474 rtx op0 = operands[0];
1476 if (GET_CODE (op0) == SUBREG)
1477 op0 = SUBREG_REG (op0);
1479 /* We must support XFmode loads into general registers for stdarg/vararg,
1480 unprototyped calls, and a rare case where a long double is passed as
1481 an argument after a float HFA fills the FP registers. We split them into
1482 DImode loads for convenience. We also need to support XFmode stores
1483 for the last case. This case does not happen for stdarg/vararg routines,
1484 because we do a block store to memory of unnamed arguments. */
1486 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1488 rtx out[2];
1490 /* We're hoping to transform everything that deals with XFmode
1491 quantities and GR registers early in the compiler. */
1492 gcc_assert (can_create_pseudo_p ());
1494 /* Struct to register can just use TImode instead. */
1495 if ((GET_CODE (operands[1]) == SUBREG
1496 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1497 || (GET_CODE (operands[1]) == REG
1498 && GR_REGNO_P (REGNO (operands[1]))))
1500 rtx op1 = operands[1];
1502 if (GET_CODE (op1) == SUBREG)
1503 op1 = SUBREG_REG (op1);
1504 else
1505 op1 = gen_rtx_REG (TImode, REGNO (op1));
1507 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1508 return true;
1511 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1513 /* Don't word-swap when reading in the constant. */
1514 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1515 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1516 0, mode));
1517 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1518 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1519 0, mode));
1520 return true;
1523 /* If the quantity is in a register not known to be GR, spill it. */
1524 if (register_operand (operands[1], mode))
1525 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1527 gcc_assert (GET_CODE (operands[1]) == MEM);
1529 /* Don't word-swap when reading in the value. */
1530 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1531 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1533 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1534 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1535 return true;
1538 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1540 /* We're hoping to transform everything that deals with XFmode
1541 quantities and GR registers early in the compiler. */
1542 gcc_assert (can_create_pseudo_p ());
1544 /* Op0 can't be a GR_REG here, as that case is handled above.
1545 If op0 is a register, then we spill op1, so that we now have a
1546 MEM operand. This requires creating an XFmode subreg of a TImode reg
1547 to force the spill. */
1548 if (register_operand (operands[0], mode))
1550 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1551 op1 = gen_rtx_SUBREG (mode, op1, 0);
1552 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1555 else
1557 rtx in[2];
1559 gcc_assert (GET_CODE (operands[0]) == MEM);
1561 /* Don't word-swap when writing out the value. */
1562 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1563 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1565 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1566 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1567 return true;
1571 if (!reload_in_progress && !reload_completed)
1573 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1575 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1577 rtx memt, memx, in = operands[1];
1578 if (CONSTANT_P (in))
1579 in = validize_mem (force_const_mem (mode, in));
1580 if (GET_CODE (in) == MEM)
1581 memt = adjust_address (in, TImode, 0);
1582 else
1584 memt = assign_stack_temp (TImode, 16, 0);
1585 memx = adjust_address (memt, mode, 0);
1586 emit_move_insn (memx, in);
1588 emit_move_insn (op0, memt);
1589 return true;
1592 if (!ia64_move_ok (operands[0], operands[1]))
1593 operands[1] = force_reg (mode, operands[1]);
1596 return false;
1599 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1600 with the expression that holds the compare result (in VOIDmode). */
1602 static GTY(()) rtx cmptf_libfunc;
1604 void
1605 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1607 enum rtx_code code = GET_CODE (*expr);
1608 rtx cmp;
1610 /* If we have a BImode input, then we already have a compare result, and
1611 do not need to emit another comparison. */
1612 if (GET_MODE (*op0) == BImode)
1614 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1615 cmp = *op0;
1617 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1618 magic number as its third argument, that indicates what to do.
1619 The return value is an integer to be compared against zero. */
1620 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1622 enum qfcmp_magic {
1623 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1624 QCMP_UNORD = 2,
1625 QCMP_EQ = 4,
1626 QCMP_LT = 8,
1627 QCMP_GT = 16
1629 int magic;
1630 enum rtx_code ncode;
1631 rtx ret, insns;
1633 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1634 switch (code)
1636 /* 1 = equal, 0 = not equal. Equality operators do
1637 not raise FP_INVALID when given an SNaN operand. */
1638 case EQ: magic = QCMP_EQ; ncode = NE; break;
1639 case NE: magic = QCMP_EQ; ncode = EQ; break;
1640 /* isunordered() from C99. */
1641 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1642 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1643 /* Relational operators raise FP_INVALID when given
1644 an SNaN operand. */
1645 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1646 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1647 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1648 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1649 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1650 Expanders for buneq etc. weuld have to be added to ia64.md
1651 for this to be useful. */
1652 default: gcc_unreachable ();
1655 start_sequence ();
1657 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1658 *op0, TFmode, *op1, TFmode,
1659 GEN_INT (magic), DImode);
1660 cmp = gen_reg_rtx (BImode);
1661 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1662 gen_rtx_fmt_ee (ncode, BImode,
1663 ret, const0_rtx)));
1665 insns = get_insns ();
1666 end_sequence ();
1668 emit_libcall_block (insns, cmp, cmp,
1669 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1670 code = NE;
1672 else
1674 cmp = gen_reg_rtx (BImode);
1675 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1676 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1677 code = NE;
1680 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1681 *op0 = cmp;
1682 *op1 = const0_rtx;
1685 /* Generate an integral vector comparison. Return true if the condition has
1686 been reversed, and so the sense of the comparison should be inverted. */
1688 static bool
1689 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1690 rtx dest, rtx op0, rtx op1)
1692 bool negate = false;
1693 rtx x;
1695 /* Canonicalize the comparison to EQ, GT, GTU. */
1696 switch (code)
1698 case EQ:
1699 case GT:
1700 case GTU:
1701 break;
1703 case NE:
1704 case LE:
1705 case LEU:
1706 code = reverse_condition (code);
1707 negate = true;
1708 break;
1710 case GE:
1711 case GEU:
1712 code = reverse_condition (code);
1713 negate = true;
1714 /* FALLTHRU */
1716 case LT:
1717 case LTU:
1718 code = swap_condition (code);
1719 x = op0, op0 = op1, op1 = x;
1720 break;
1722 default:
1723 gcc_unreachable ();
1726 /* Unsigned parallel compare is not supported by the hardware. Play some
1727 tricks to turn this into a signed comparison against 0. */
1728 if (code == GTU)
1730 switch (mode)
1732 case V2SImode:
1734 rtx t1, t2, mask;
1736 /* Subtract (-(INT MAX) - 1) from both operands to make
1737 them signed. */
1738 mask = GEN_INT (0x80000000);
1739 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1740 mask = force_reg (mode, mask);
1741 t1 = gen_reg_rtx (mode);
1742 emit_insn (gen_subv2si3 (t1, op0, mask));
1743 t2 = gen_reg_rtx (mode);
1744 emit_insn (gen_subv2si3 (t2, op1, mask));
1745 op0 = t1;
1746 op1 = t2;
1747 code = GT;
1749 break;
1751 case V8QImode:
1752 case V4HImode:
1753 /* Perform a parallel unsigned saturating subtraction. */
1754 x = gen_reg_rtx (mode);
1755 emit_insn (gen_rtx_SET (VOIDmode, x,
1756 gen_rtx_US_MINUS (mode, op0, op1)));
1758 code = EQ;
1759 op0 = x;
1760 op1 = CONST0_RTX (mode);
1761 negate = !negate;
1762 break;
1764 default:
1765 gcc_unreachable ();
1769 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1770 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1772 return negate;
1775 /* Emit an integral vector conditional move. */
1777 void
1778 ia64_expand_vecint_cmov (rtx operands[])
1780 enum machine_mode mode = GET_MODE (operands[0]);
1781 enum rtx_code code = GET_CODE (operands[3]);
1782 bool negate;
1783 rtx cmp, x, ot, of;
1785 cmp = gen_reg_rtx (mode);
1786 negate = ia64_expand_vecint_compare (code, mode, cmp,
1787 operands[4], operands[5]);
1789 ot = operands[1+negate];
1790 of = operands[2-negate];
1792 if (ot == CONST0_RTX (mode))
1794 if (of == CONST0_RTX (mode))
1796 emit_move_insn (operands[0], ot);
1797 return;
1800 x = gen_rtx_NOT (mode, cmp);
1801 x = gen_rtx_AND (mode, x, of);
1802 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1804 else if (of == CONST0_RTX (mode))
1806 x = gen_rtx_AND (mode, cmp, ot);
1807 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1809 else
1811 rtx t, f;
1813 t = gen_reg_rtx (mode);
1814 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1815 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1817 f = gen_reg_rtx (mode);
1818 x = gen_rtx_NOT (mode, cmp);
1819 x = gen_rtx_AND (mode, x, operands[2-negate]);
1820 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1822 x = gen_rtx_IOR (mode, t, f);
1823 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1827 /* Emit an integral vector min or max operation. Return true if all done. */
1829 bool
1830 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1831 rtx operands[])
1833 rtx xops[6];
1835 /* These four combinations are supported directly. */
1836 if (mode == V8QImode && (code == UMIN || code == UMAX))
1837 return false;
1838 if (mode == V4HImode && (code == SMIN || code == SMAX))
1839 return false;
1841 /* This combination can be implemented with only saturating subtraction. */
1842 if (mode == V4HImode && code == UMAX)
1844 rtx x, tmp = gen_reg_rtx (mode);
1846 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1847 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1849 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1850 return true;
1853 /* Everything else implemented via vector comparisons. */
1854 xops[0] = operands[0];
1855 xops[4] = xops[1] = operands[1];
1856 xops[5] = xops[2] = operands[2];
1858 switch (code)
1860 case UMIN:
1861 code = LTU;
1862 break;
1863 case UMAX:
1864 code = GTU;
1865 break;
1866 case SMIN:
1867 code = LT;
1868 break;
1869 case SMAX:
1870 code = GT;
1871 break;
1872 default:
1873 gcc_unreachable ();
1875 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1877 ia64_expand_vecint_cmov (xops);
1878 return true;
1881 /* Emit an integral vector widening sum operations. */
1883 void
1884 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1886 rtx l, h, x, s;
1887 enum machine_mode wmode, mode;
1888 rtx (*unpack_l) (rtx, rtx, rtx);
1889 rtx (*unpack_h) (rtx, rtx, rtx);
1890 rtx (*plus) (rtx, rtx, rtx);
1892 wmode = GET_MODE (operands[0]);
1893 mode = GET_MODE (operands[1]);
1895 switch (mode)
1897 case V8QImode:
1898 unpack_l = gen_unpack1_l;
1899 unpack_h = gen_unpack1_h;
1900 plus = gen_addv4hi3;
1901 break;
1902 case V4HImode:
1903 unpack_l = gen_unpack2_l;
1904 unpack_h = gen_unpack2_h;
1905 plus = gen_addv2si3;
1906 break;
1907 default:
1908 gcc_unreachable ();
1911 /* Fill in x with the sign extension of each element in op1. */
1912 if (unsignedp)
1913 x = CONST0_RTX (mode);
1914 else
1916 bool neg;
1918 x = gen_reg_rtx (mode);
1920 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1921 CONST0_RTX (mode));
1922 gcc_assert (!neg);
1925 l = gen_reg_rtx (wmode);
1926 h = gen_reg_rtx (wmode);
1927 s = gen_reg_rtx (wmode);
1929 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1930 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1931 emit_insn (plus (s, l, operands[2]));
1932 emit_insn (plus (operands[0], h, s));
1935 /* Emit a signed or unsigned V8QI dot product operation. */
1937 void
1938 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1940 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1942 /* Fill in x1 and x2 with the sign extension of each element. */
1943 if (unsignedp)
1944 x1 = x2 = CONST0_RTX (V8QImode);
1945 else
1947 bool neg;
1949 x1 = gen_reg_rtx (V8QImode);
1950 x2 = gen_reg_rtx (V8QImode);
1952 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1953 CONST0_RTX (V8QImode));
1954 gcc_assert (!neg);
1955 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1956 CONST0_RTX (V8QImode));
1957 gcc_assert (!neg);
1960 l1 = gen_reg_rtx (V4HImode);
1961 l2 = gen_reg_rtx (V4HImode);
1962 h1 = gen_reg_rtx (V4HImode);
1963 h2 = gen_reg_rtx (V4HImode);
1965 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1966 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1967 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1968 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1970 p1 = gen_reg_rtx (V2SImode);
1971 p2 = gen_reg_rtx (V2SImode);
1972 p3 = gen_reg_rtx (V2SImode);
1973 p4 = gen_reg_rtx (V2SImode);
1974 emit_insn (gen_pmpy2_r (p1, l1, l2));
1975 emit_insn (gen_pmpy2_l (p2, l1, l2));
1976 emit_insn (gen_pmpy2_r (p3, h1, h2));
1977 emit_insn (gen_pmpy2_l (p4, h1, h2));
1979 s1 = gen_reg_rtx (V2SImode);
1980 s2 = gen_reg_rtx (V2SImode);
1981 s3 = gen_reg_rtx (V2SImode);
1982 emit_insn (gen_addv2si3 (s1, p1, p2));
1983 emit_insn (gen_addv2si3 (s2, p3, p4));
1984 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1985 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1988 /* Emit the appropriate sequence for a call. */
1990 void
1991 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1992 int sibcall_p)
1994 rtx insn, b0;
1996 addr = XEXP (addr, 0);
1997 addr = convert_memory_address (DImode, addr);
1998 b0 = gen_rtx_REG (DImode, R_BR (0));
2000 /* ??? Should do this for functions known to bind local too. */
2001 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2003 if (sibcall_p)
2004 insn = gen_sibcall_nogp (addr);
2005 else if (! retval)
2006 insn = gen_call_nogp (addr, b0);
2007 else
2008 insn = gen_call_value_nogp (retval, addr, b0);
2009 insn = emit_call_insn (insn);
2011 else
2013 if (sibcall_p)
2014 insn = gen_sibcall_gp (addr);
2015 else if (! retval)
2016 insn = gen_call_gp (addr, b0);
2017 else
2018 insn = gen_call_value_gp (retval, addr, b0);
2019 insn = emit_call_insn (insn);
2021 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2024 if (sibcall_p)
2025 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2027 if (TARGET_ABI_OPEN_VMS)
2028 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2029 gen_rtx_REG (DImode, GR_REG (25)));
2032 static void
2033 reg_emitted (enum ia64_frame_regs r)
2035 if (emitted_frame_related_regs[r] == 0)
2036 emitted_frame_related_regs[r] = current_frame_info.r[r];
2037 else
2038 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2041 static int
2042 get_reg (enum ia64_frame_regs r)
2044 reg_emitted (r);
2045 return current_frame_info.r[r];
2048 static bool
2049 is_emitted (int regno)
2051 unsigned int r;
2053 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2054 if (emitted_frame_related_regs[r] == regno)
2055 return true;
2056 return false;
2059 void
2060 ia64_reload_gp (void)
2062 rtx tmp;
2064 if (current_frame_info.r[reg_save_gp])
2066 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2068 else
2070 HOST_WIDE_INT offset;
2071 rtx offset_r;
2073 offset = (current_frame_info.spill_cfa_off
2074 + current_frame_info.spill_size);
2075 if (frame_pointer_needed)
2077 tmp = hard_frame_pointer_rtx;
2078 offset = -offset;
2080 else
2082 tmp = stack_pointer_rtx;
2083 offset = current_frame_info.total_size - offset;
2086 offset_r = GEN_INT (offset);
2087 if (satisfies_constraint_I (offset_r))
2088 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2089 else
2091 emit_move_insn (pic_offset_table_rtx, offset_r);
2092 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2093 pic_offset_table_rtx, tmp));
2096 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2099 emit_move_insn (pic_offset_table_rtx, tmp);
2102 void
2103 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2104 rtx scratch_b, int noreturn_p, int sibcall_p)
2106 rtx insn;
2107 bool is_desc = false;
2109 /* If we find we're calling through a register, then we're actually
2110 calling through a descriptor, so load up the values. */
2111 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2113 rtx tmp;
2114 bool addr_dead_p;
2116 /* ??? We are currently constrained to *not* use peep2, because
2117 we can legitimately change the global lifetime of the GP
2118 (in the form of killing where previously live). This is
2119 because a call through a descriptor doesn't use the previous
2120 value of the GP, while a direct call does, and we do not
2121 commit to either form until the split here.
2123 That said, this means that we lack precise life info for
2124 whether ADDR is dead after this call. This is not terribly
2125 important, since we can fix things up essentially for free
2126 with the POST_DEC below, but it's nice to not use it when we
2127 can immediately tell it's not necessary. */
2128 addr_dead_p = ((noreturn_p || sibcall_p
2129 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2130 REGNO (addr)))
2131 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2133 /* Load the code address into scratch_b. */
2134 tmp = gen_rtx_POST_INC (Pmode, addr);
2135 tmp = gen_rtx_MEM (Pmode, tmp);
2136 emit_move_insn (scratch_r, tmp);
2137 emit_move_insn (scratch_b, scratch_r);
2139 /* Load the GP address. If ADDR is not dead here, then we must
2140 revert the change made above via the POST_INCREMENT. */
2141 if (!addr_dead_p)
2142 tmp = gen_rtx_POST_DEC (Pmode, addr);
2143 else
2144 tmp = addr;
2145 tmp = gen_rtx_MEM (Pmode, tmp);
2146 emit_move_insn (pic_offset_table_rtx, tmp);
2148 is_desc = true;
2149 addr = scratch_b;
2152 if (sibcall_p)
2153 insn = gen_sibcall_nogp (addr);
2154 else if (retval)
2155 insn = gen_call_value_nogp (retval, addr, retaddr);
2156 else
2157 insn = gen_call_nogp (addr, retaddr);
2158 emit_call_insn (insn);
2160 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2161 ia64_reload_gp ();
2164 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2166 This differs from the generic code in that we know about the zero-extending
2167 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2168 also know that ld.acq+cmpxchg.rel equals a full barrier.
2170 The loop we want to generate looks like
2172 cmp_reg = mem;
2173 label:
2174 old_reg = cmp_reg;
2175 new_reg = cmp_reg op val;
2176 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2177 if (cmp_reg != old_reg)
2178 goto label;
2180 Note that we only do the plain load from memory once. Subsequent
2181 iterations use the value loaded by the compare-and-swap pattern. */
2183 void
2184 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2185 rtx old_dst, rtx new_dst)
2187 enum machine_mode mode = GET_MODE (mem);
2188 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2189 enum insn_code icode;
2191 /* Special case for using fetchadd. */
2192 if ((mode == SImode || mode == DImode)
2193 && (code == PLUS || code == MINUS)
2194 && fetchadd_operand (val, mode))
2196 if (code == MINUS)
2197 val = GEN_INT (-INTVAL (val));
2199 if (!old_dst)
2200 old_dst = gen_reg_rtx (mode);
2202 emit_insn (gen_memory_barrier ());
2204 if (mode == SImode)
2205 icode = CODE_FOR_fetchadd_acq_si;
2206 else
2207 icode = CODE_FOR_fetchadd_acq_di;
2208 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2210 if (new_dst)
2212 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2213 true, OPTAB_WIDEN);
2214 if (new_reg != new_dst)
2215 emit_move_insn (new_dst, new_reg);
2217 return;
2220 /* Because of the volatile mem read, we get an ld.acq, which is the
2221 front half of the full barrier. The end half is the cmpxchg.rel. */
2222 gcc_assert (MEM_VOLATILE_P (mem));
2224 old_reg = gen_reg_rtx (DImode);
2225 cmp_reg = gen_reg_rtx (DImode);
2226 label = gen_label_rtx ();
2228 if (mode != DImode)
2230 val = simplify_gen_subreg (DImode, val, mode, 0);
2231 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2233 else
2234 emit_move_insn (cmp_reg, mem);
2236 emit_label (label);
2238 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2239 emit_move_insn (old_reg, cmp_reg);
2240 emit_move_insn (ar_ccv, cmp_reg);
2242 if (old_dst)
2243 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2245 new_reg = cmp_reg;
2246 if (code == NOT)
2248 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2249 true, OPTAB_DIRECT);
2250 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2252 else
2253 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2254 true, OPTAB_DIRECT);
2256 if (mode != DImode)
2257 new_reg = gen_lowpart (mode, new_reg);
2258 if (new_dst)
2259 emit_move_insn (new_dst, new_reg);
2261 switch (mode)
2263 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2264 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2265 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2266 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2267 default:
2268 gcc_unreachable ();
2271 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2273 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2276 /* Begin the assembly file. */
2278 static void
2279 ia64_file_start (void)
2281 /* Variable tracking should be run after all optimizations which change order
2282 of insns. It also needs a valid CFG. This can't be done in
2283 ia64_override_options, because flag_var_tracking is finalized after
2284 that. */
2285 ia64_flag_var_tracking = flag_var_tracking;
2286 flag_var_tracking = 0;
2288 default_file_start ();
2289 emit_safe_across_calls ();
2292 void
2293 emit_safe_across_calls (void)
2295 unsigned int rs, re;
2296 int out_state;
2298 rs = 1;
2299 out_state = 0;
2300 while (1)
2302 while (rs < 64 && call_used_regs[PR_REG (rs)])
2303 rs++;
2304 if (rs >= 64)
2305 break;
2306 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2307 continue;
2308 if (out_state == 0)
2310 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2311 out_state = 1;
2313 else
2314 fputc (',', asm_out_file);
2315 if (re == rs + 1)
2316 fprintf (asm_out_file, "p%u", rs);
2317 else
2318 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2319 rs = re + 1;
2321 if (out_state)
2322 fputc ('\n', asm_out_file);
2325 /* Globalize a declaration. */
2327 static void
2328 ia64_globalize_decl_name (FILE * stream, tree decl)
2330 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2331 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2332 if (version_attr)
2334 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2335 const char *p = TREE_STRING_POINTER (v);
2336 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2338 targetm.asm_out.globalize_label (stream, name);
2339 if (TREE_CODE (decl) == FUNCTION_DECL)
2340 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2343 /* Helper function for ia64_compute_frame_size: find an appropriate general
2344 register to spill some special register to. SPECIAL_SPILL_MASK contains
2345 bits in GR0 to GR31 that have already been allocated by this routine.
2346 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2348 static int
2349 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2351 int regno;
2353 if (emitted_frame_related_regs[r] != 0)
2355 regno = emitted_frame_related_regs[r];
2356 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2357 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2358 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2359 else if (current_function_is_leaf
2360 && regno >= GR_REG (1) && regno <= GR_REG (31))
2361 current_frame_info.gr_used_mask |= 1 << regno;
2363 return regno;
2366 /* If this is a leaf function, first try an otherwise unused
2367 call-clobbered register. */
2368 if (current_function_is_leaf)
2370 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2371 if (! df_regs_ever_live_p (regno)
2372 && call_used_regs[regno]
2373 && ! fixed_regs[regno]
2374 && ! global_regs[regno]
2375 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2376 && ! is_emitted (regno))
2378 current_frame_info.gr_used_mask |= 1 << regno;
2379 return regno;
2383 if (try_locals)
2385 regno = current_frame_info.n_local_regs;
2386 /* If there is a frame pointer, then we can't use loc79, because
2387 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2388 reg_name switching code in ia64_expand_prologue. */
2389 while (regno < (80 - frame_pointer_needed))
2390 if (! is_emitted (LOC_REG (regno++)))
2392 current_frame_info.n_local_regs = regno;
2393 return LOC_REG (regno - 1);
2397 /* Failed to find a general register to spill to. Must use stack. */
2398 return 0;
2401 /* In order to make for nice schedules, we try to allocate every temporary
2402 to a different register. We must of course stay away from call-saved,
2403 fixed, and global registers. We must also stay away from registers
2404 allocated in current_frame_info.gr_used_mask, since those include regs
2405 used all through the prologue.
2407 Any register allocated here must be used immediately. The idea is to
2408 aid scheduling, not to solve data flow problems. */
2410 static int last_scratch_gr_reg;
2412 static int
2413 next_scratch_gr_reg (void)
2415 int i, regno;
2417 for (i = 0; i < 32; ++i)
2419 regno = (last_scratch_gr_reg + i + 1) & 31;
2420 if (call_used_regs[regno]
2421 && ! fixed_regs[regno]
2422 && ! global_regs[regno]
2423 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2425 last_scratch_gr_reg = regno;
2426 return regno;
2430 /* There must be _something_ available. */
2431 gcc_unreachable ();
2434 /* Helper function for ia64_compute_frame_size, called through
2435 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2437 static void
2438 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2440 unsigned int regno = REGNO (reg);
2441 if (regno < 32)
2443 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2444 for (i = 0; i < n; ++i)
2445 current_frame_info.gr_used_mask |= 1 << (regno + i);
2450 /* Returns the number of bytes offset between the frame pointer and the stack
2451 pointer for the current function. SIZE is the number of bytes of space
2452 needed for local variables. */
2454 static void
2455 ia64_compute_frame_size (HOST_WIDE_INT size)
2457 HOST_WIDE_INT total_size;
2458 HOST_WIDE_INT spill_size = 0;
2459 HOST_WIDE_INT extra_spill_size = 0;
2460 HOST_WIDE_INT pretend_args_size;
2461 HARD_REG_SET mask;
2462 int n_spilled = 0;
2463 int spilled_gr_p = 0;
2464 int spilled_fr_p = 0;
2465 unsigned int regno;
2466 int min_regno;
2467 int max_regno;
2468 int i;
2470 if (current_frame_info.initialized)
2471 return;
2473 memset (&current_frame_info, 0, sizeof current_frame_info);
2474 CLEAR_HARD_REG_SET (mask);
2476 /* Don't allocate scratches to the return register. */
2477 diddle_return_value (mark_reg_gr_used_mask, NULL);
2479 /* Don't allocate scratches to the EH scratch registers. */
2480 if (cfun->machine->ia64_eh_epilogue_sp)
2481 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2482 if (cfun->machine->ia64_eh_epilogue_bsp)
2483 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2485 /* Find the size of the register stack frame. We have only 80 local
2486 registers, because we reserve 8 for the inputs and 8 for the
2487 outputs. */
2489 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2490 since we'll be adjusting that down later. */
2491 regno = LOC_REG (78) + ! frame_pointer_needed;
2492 for (; regno >= LOC_REG (0); regno--)
2493 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2494 break;
2495 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2497 /* For functions marked with the syscall_linkage attribute, we must mark
2498 all eight input registers as in use, so that locals aren't visible to
2499 the caller. */
2501 if (cfun->machine->n_varargs > 0
2502 || lookup_attribute ("syscall_linkage",
2503 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2504 current_frame_info.n_input_regs = 8;
2505 else
2507 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2508 if (df_regs_ever_live_p (regno))
2509 break;
2510 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2513 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2514 if (df_regs_ever_live_p (regno))
2515 break;
2516 i = regno - OUT_REG (0) + 1;
2518 #ifndef PROFILE_HOOK
2519 /* When -p profiling, we need one output register for the mcount argument.
2520 Likewise for -a profiling for the bb_init_func argument. For -ax
2521 profiling, we need two output registers for the two bb_init_trace_func
2522 arguments. */
2523 if (crtl->profile)
2524 i = MAX (i, 1);
2525 #endif
2526 current_frame_info.n_output_regs = i;
2528 /* ??? No rotating register support yet. */
2529 current_frame_info.n_rotate_regs = 0;
2531 /* Discover which registers need spilling, and how much room that
2532 will take. Begin with floating point and general registers,
2533 which will always wind up on the stack. */
2535 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2536 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2538 SET_HARD_REG_BIT (mask, regno);
2539 spill_size += 16;
2540 n_spilled += 1;
2541 spilled_fr_p = 1;
2544 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2545 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2547 SET_HARD_REG_BIT (mask, regno);
2548 spill_size += 8;
2549 n_spilled += 1;
2550 spilled_gr_p = 1;
2553 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2554 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2556 SET_HARD_REG_BIT (mask, regno);
2557 spill_size += 8;
2558 n_spilled += 1;
2561 /* Now come all special registers that might get saved in other
2562 general registers. */
2564 if (frame_pointer_needed)
2566 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2567 /* If we did not get a register, then we take LOC79. This is guaranteed
2568 to be free, even if regs_ever_live is already set, because this is
2569 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2570 as we don't count loc79 above. */
2571 if (current_frame_info.r[reg_fp] == 0)
2573 current_frame_info.r[reg_fp] = LOC_REG (79);
2574 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2578 if (! current_function_is_leaf)
2580 /* Emit a save of BR0 if we call other functions. Do this even
2581 if this function doesn't return, as EH depends on this to be
2582 able to unwind the stack. */
2583 SET_HARD_REG_BIT (mask, BR_REG (0));
2585 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2586 if (current_frame_info.r[reg_save_b0] == 0)
2588 extra_spill_size += 8;
2589 n_spilled += 1;
2592 /* Similarly for ar.pfs. */
2593 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2594 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2595 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2597 extra_spill_size += 8;
2598 n_spilled += 1;
2601 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2602 registers are clobbered, so we fall back to the stack. */
2603 current_frame_info.r[reg_save_gp]
2604 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2605 if (current_frame_info.r[reg_save_gp] == 0)
2607 SET_HARD_REG_BIT (mask, GR_REG (1));
2608 spill_size += 8;
2609 n_spilled += 1;
2612 else
2614 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2616 SET_HARD_REG_BIT (mask, BR_REG (0));
2617 extra_spill_size += 8;
2618 n_spilled += 1;
2621 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2623 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2624 current_frame_info.r[reg_save_ar_pfs]
2625 = find_gr_spill (reg_save_ar_pfs, 1);
2626 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2628 extra_spill_size += 8;
2629 n_spilled += 1;
2634 /* Unwind descriptor hackery: things are most efficient if we allocate
2635 consecutive GR save registers for RP, PFS, FP in that order. However,
2636 it is absolutely critical that FP get the only hard register that's
2637 guaranteed to be free, so we allocated it first. If all three did
2638 happen to be allocated hard regs, and are consecutive, rearrange them
2639 into the preferred order now.
2641 If we have already emitted code for any of those registers,
2642 then it's already too late to change. */
2643 min_regno = MIN (current_frame_info.r[reg_fp],
2644 MIN (current_frame_info.r[reg_save_b0],
2645 current_frame_info.r[reg_save_ar_pfs]));
2646 max_regno = MAX (current_frame_info.r[reg_fp],
2647 MAX (current_frame_info.r[reg_save_b0],
2648 current_frame_info.r[reg_save_ar_pfs]));
2649 if (min_regno > 0
2650 && min_regno + 2 == max_regno
2651 && (current_frame_info.r[reg_fp] == min_regno + 1
2652 || current_frame_info.r[reg_save_b0] == min_regno + 1
2653 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2654 && (emitted_frame_related_regs[reg_save_b0] == 0
2655 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2656 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2657 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2658 && (emitted_frame_related_regs[reg_fp] == 0
2659 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2661 current_frame_info.r[reg_save_b0] = min_regno;
2662 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2663 current_frame_info.r[reg_fp] = min_regno + 2;
2666 /* See if we need to store the predicate register block. */
2667 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2668 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2669 break;
2670 if (regno <= PR_REG (63))
2672 SET_HARD_REG_BIT (mask, PR_REG (0));
2673 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2674 if (current_frame_info.r[reg_save_pr] == 0)
2676 extra_spill_size += 8;
2677 n_spilled += 1;
2680 /* ??? Mark them all as used so that register renaming and such
2681 are free to use them. */
2682 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2683 df_set_regs_ever_live (regno, true);
2686 /* If we're forced to use st8.spill, we're forced to save and restore
2687 ar.unat as well. The check for existing liveness allows inline asm
2688 to touch ar.unat. */
2689 if (spilled_gr_p || cfun->machine->n_varargs
2690 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2692 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2693 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2694 current_frame_info.r[reg_save_ar_unat]
2695 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2696 if (current_frame_info.r[reg_save_ar_unat] == 0)
2698 extra_spill_size += 8;
2699 n_spilled += 1;
2703 if (df_regs_ever_live_p (AR_LC_REGNUM))
2705 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2706 current_frame_info.r[reg_save_ar_lc]
2707 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2708 if (current_frame_info.r[reg_save_ar_lc] == 0)
2710 extra_spill_size += 8;
2711 n_spilled += 1;
2715 /* If we have an odd number of words of pretend arguments written to
2716 the stack, then the FR save area will be unaligned. We round the
2717 size of this area up to keep things 16 byte aligned. */
2718 if (spilled_fr_p)
2719 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2720 else
2721 pretend_args_size = crtl->args.pretend_args_size;
2723 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2724 + crtl->outgoing_args_size);
2725 total_size = IA64_STACK_ALIGN (total_size);
2727 /* We always use the 16-byte scratch area provided by the caller, but
2728 if we are a leaf function, there's no one to which we need to provide
2729 a scratch area. */
2730 if (current_function_is_leaf)
2731 total_size = MAX (0, total_size - 16);
2733 current_frame_info.total_size = total_size;
2734 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2735 current_frame_info.spill_size = spill_size;
2736 current_frame_info.extra_spill_size = extra_spill_size;
2737 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2738 current_frame_info.n_spilled = n_spilled;
2739 current_frame_info.initialized = reload_completed;
2742 /* Worker function for TARGET_CAN_ELIMINATE. */
2744 bool
2745 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2747 return (to == BR_REG (0) ? current_function_is_leaf : true);
2750 /* Compute the initial difference between the specified pair of registers. */
2752 HOST_WIDE_INT
2753 ia64_initial_elimination_offset (int from, int to)
2755 HOST_WIDE_INT offset;
2757 ia64_compute_frame_size (get_frame_size ());
2758 switch (from)
2760 case FRAME_POINTER_REGNUM:
2761 switch (to)
2763 case HARD_FRAME_POINTER_REGNUM:
2764 if (current_function_is_leaf)
2765 offset = -current_frame_info.total_size;
2766 else
2767 offset = -(current_frame_info.total_size
2768 - crtl->outgoing_args_size - 16);
2769 break;
2771 case STACK_POINTER_REGNUM:
2772 if (current_function_is_leaf)
2773 offset = 0;
2774 else
2775 offset = 16 + crtl->outgoing_args_size;
2776 break;
2778 default:
2779 gcc_unreachable ();
2781 break;
2783 case ARG_POINTER_REGNUM:
2784 /* Arguments start above the 16 byte save area, unless stdarg
2785 in which case we store through the 16 byte save area. */
2786 switch (to)
2788 case HARD_FRAME_POINTER_REGNUM:
2789 offset = 16 - crtl->args.pretend_args_size;
2790 break;
2792 case STACK_POINTER_REGNUM:
2793 offset = (current_frame_info.total_size
2794 + 16 - crtl->args.pretend_args_size);
2795 break;
2797 default:
2798 gcc_unreachable ();
2800 break;
2802 default:
2803 gcc_unreachable ();
2806 return offset;
2809 /* If there are more than a trivial number of register spills, we use
2810 two interleaved iterators so that we can get two memory references
2811 per insn group.
2813 In order to simplify things in the prologue and epilogue expanders,
2814 we use helper functions to fix up the memory references after the
2815 fact with the appropriate offsets to a POST_MODIFY memory mode.
2816 The following data structure tracks the state of the two iterators
2817 while insns are being emitted. */
2819 struct spill_fill_data
2821 rtx init_after; /* point at which to emit initializations */
2822 rtx init_reg[2]; /* initial base register */
2823 rtx iter_reg[2]; /* the iterator registers */
2824 rtx *prev_addr[2]; /* address of last memory use */
2825 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2826 HOST_WIDE_INT prev_off[2]; /* last offset */
2827 int n_iter; /* number of iterators in use */
2828 int next_iter; /* next iterator to use */
2829 unsigned int save_gr_used_mask;
2832 static struct spill_fill_data spill_fill_data;
2834 static void
2835 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2837 int i;
2839 spill_fill_data.init_after = get_last_insn ();
2840 spill_fill_data.init_reg[0] = init_reg;
2841 spill_fill_data.init_reg[1] = init_reg;
2842 spill_fill_data.prev_addr[0] = NULL;
2843 spill_fill_data.prev_addr[1] = NULL;
2844 spill_fill_data.prev_insn[0] = NULL;
2845 spill_fill_data.prev_insn[1] = NULL;
2846 spill_fill_data.prev_off[0] = cfa_off;
2847 spill_fill_data.prev_off[1] = cfa_off;
2848 spill_fill_data.next_iter = 0;
2849 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2851 spill_fill_data.n_iter = 1 + (n_spills > 2);
2852 for (i = 0; i < spill_fill_data.n_iter; ++i)
2854 int regno = next_scratch_gr_reg ();
2855 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2856 current_frame_info.gr_used_mask |= 1 << regno;
2860 static void
2861 finish_spill_pointers (void)
2863 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2866 static rtx
2867 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2869 int iter = spill_fill_data.next_iter;
2870 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2871 rtx disp_rtx = GEN_INT (disp);
2872 rtx mem;
2874 if (spill_fill_data.prev_addr[iter])
2876 if (satisfies_constraint_N (disp_rtx))
2878 *spill_fill_data.prev_addr[iter]
2879 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2880 gen_rtx_PLUS (DImode,
2881 spill_fill_data.iter_reg[iter],
2882 disp_rtx));
2883 add_reg_note (spill_fill_data.prev_insn[iter],
2884 REG_INC, spill_fill_data.iter_reg[iter]);
2886 else
2888 /* ??? Could use register post_modify for loads. */
2889 if (!satisfies_constraint_I (disp_rtx))
2891 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2892 emit_move_insn (tmp, disp_rtx);
2893 disp_rtx = tmp;
2895 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2896 spill_fill_data.iter_reg[iter], disp_rtx));
2899 /* Micro-optimization: if we've created a frame pointer, it's at
2900 CFA 0, which may allow the real iterator to be initialized lower,
2901 slightly increasing parallelism. Also, if there are few saves
2902 it may eliminate the iterator entirely. */
2903 else if (disp == 0
2904 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2905 && frame_pointer_needed)
2907 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2908 set_mem_alias_set (mem, get_varargs_alias_set ());
2909 return mem;
2911 else
2913 rtx seq, insn;
2915 if (disp == 0)
2916 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2917 spill_fill_data.init_reg[iter]);
2918 else
2920 start_sequence ();
2922 if (!satisfies_constraint_I (disp_rtx))
2924 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2925 emit_move_insn (tmp, disp_rtx);
2926 disp_rtx = tmp;
2929 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2930 spill_fill_data.init_reg[iter],
2931 disp_rtx));
2933 seq = get_insns ();
2934 end_sequence ();
2937 /* Careful for being the first insn in a sequence. */
2938 if (spill_fill_data.init_after)
2939 insn = emit_insn_after (seq, spill_fill_data.init_after);
2940 else
2942 rtx first = get_insns ();
2943 if (first)
2944 insn = emit_insn_before (seq, first);
2945 else
2946 insn = emit_insn (seq);
2948 spill_fill_data.init_after = insn;
2951 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2953 /* ??? Not all of the spills are for varargs, but some of them are.
2954 The rest of the spills belong in an alias set of their own. But
2955 it doesn't actually hurt to include them here. */
2956 set_mem_alias_set (mem, get_varargs_alias_set ());
2958 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2959 spill_fill_data.prev_off[iter] = cfa_off;
2961 if (++iter >= spill_fill_data.n_iter)
2962 iter = 0;
2963 spill_fill_data.next_iter = iter;
2965 return mem;
2968 static void
2969 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2970 rtx frame_reg)
2972 int iter = spill_fill_data.next_iter;
2973 rtx mem, insn;
2975 mem = spill_restore_mem (reg, cfa_off);
2976 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2977 spill_fill_data.prev_insn[iter] = insn;
2979 if (frame_reg)
2981 rtx base;
2982 HOST_WIDE_INT off;
2984 RTX_FRAME_RELATED_P (insn) = 1;
2986 /* Don't even pretend that the unwind code can intuit its way
2987 through a pair of interleaved post_modify iterators. Just
2988 provide the correct answer. */
2990 if (frame_pointer_needed)
2992 base = hard_frame_pointer_rtx;
2993 off = - cfa_off;
2995 else
2997 base = stack_pointer_rtx;
2998 off = current_frame_info.total_size - cfa_off;
3001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3002 gen_rtx_SET (VOIDmode,
3003 gen_rtx_MEM (GET_MODE (reg),
3004 plus_constant (base, off)),
3005 frame_reg));
3009 static void
3010 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3012 int iter = spill_fill_data.next_iter;
3013 rtx insn;
3015 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3016 GEN_INT (cfa_off)));
3017 spill_fill_data.prev_insn[iter] = insn;
3020 /* Wrapper functions that discards the CONST_INT spill offset. These
3021 exist so that we can give gr_spill/gr_fill the offset they need and
3022 use a consistent function interface. */
3024 static rtx
3025 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3027 return gen_movdi (dest, src);
3030 static rtx
3031 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3033 return gen_fr_spill (dest, src);
3036 static rtx
3037 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3039 return gen_fr_restore (dest, src);
3042 /* Called after register allocation to add any instructions needed for the
3043 prologue. Using a prologue insn is favored compared to putting all of the
3044 instructions in output_function_prologue(), since it allows the scheduler
3045 to intermix instructions with the saves of the caller saved registers. In
3046 some cases, it might be necessary to emit a barrier instruction as the last
3047 insn to prevent such scheduling.
3049 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3050 so that the debug info generation code can handle them properly.
3052 The register save area is layed out like so:
3053 cfa+16
3054 [ varargs spill area ]
3055 [ fr register spill area ]
3056 [ br register spill area ]
3057 [ ar register spill area ]
3058 [ pr register spill area ]
3059 [ gr register spill area ] */
3061 /* ??? Get inefficient code when the frame size is larger than can fit in an
3062 adds instruction. */
3064 void
3065 ia64_expand_prologue (void)
3067 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3068 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3069 rtx reg, alt_reg;
3071 ia64_compute_frame_size (get_frame_size ());
3072 last_scratch_gr_reg = 15;
3074 if (dump_file)
3076 fprintf (dump_file, "ia64 frame related registers "
3077 "recorded in current_frame_info.r[]:\n");
3078 #define PRINTREG(a) if (current_frame_info.r[a]) \
3079 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3080 PRINTREG(reg_fp);
3081 PRINTREG(reg_save_b0);
3082 PRINTREG(reg_save_pr);
3083 PRINTREG(reg_save_ar_pfs);
3084 PRINTREG(reg_save_ar_unat);
3085 PRINTREG(reg_save_ar_lc);
3086 PRINTREG(reg_save_gp);
3087 #undef PRINTREG
3090 /* If there is no epilogue, then we don't need some prologue insns.
3091 We need to avoid emitting the dead prologue insns, because flow
3092 will complain about them. */
3093 if (optimize)
3095 edge e;
3096 edge_iterator ei;
3098 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3099 if ((e->flags & EDGE_FAKE) == 0
3100 && (e->flags & EDGE_FALLTHRU) != 0)
3101 break;
3102 epilogue_p = (e != NULL);
3104 else
3105 epilogue_p = 1;
3107 /* Set the local, input, and output register names. We need to do this
3108 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3109 half. If we use in/loc/out register names, then we get assembler errors
3110 in crtn.S because there is no alloc insn or regstk directive in there. */
3111 if (! TARGET_REG_NAMES)
3113 int inputs = current_frame_info.n_input_regs;
3114 int locals = current_frame_info.n_local_regs;
3115 int outputs = current_frame_info.n_output_regs;
3117 for (i = 0; i < inputs; i++)
3118 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3119 for (i = 0; i < locals; i++)
3120 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3121 for (i = 0; i < outputs; i++)
3122 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3125 /* Set the frame pointer register name. The regnum is logically loc79,
3126 but of course we'll not have allocated that many locals. Rather than
3127 worrying about renumbering the existing rtxs, we adjust the name. */
3128 /* ??? This code means that we can never use one local register when
3129 there is a frame pointer. loc79 gets wasted in this case, as it is
3130 renamed to a register that will never be used. See also the try_locals
3131 code in find_gr_spill. */
3132 if (current_frame_info.r[reg_fp])
3134 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3135 reg_names[HARD_FRAME_POINTER_REGNUM]
3136 = reg_names[current_frame_info.r[reg_fp]];
3137 reg_names[current_frame_info.r[reg_fp]] = tmp;
3140 /* We don't need an alloc instruction if we've used no outputs or locals. */
3141 if (current_frame_info.n_local_regs == 0
3142 && current_frame_info.n_output_regs == 0
3143 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3144 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3146 /* If there is no alloc, but there are input registers used, then we
3147 need a .regstk directive. */
3148 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3149 ar_pfs_save_reg = NULL_RTX;
3151 else
3153 current_frame_info.need_regstk = 0;
3155 if (current_frame_info.r[reg_save_ar_pfs])
3157 regno = current_frame_info.r[reg_save_ar_pfs];
3158 reg_emitted (reg_save_ar_pfs);
3160 else
3161 regno = next_scratch_gr_reg ();
3162 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3164 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3165 GEN_INT (current_frame_info.n_input_regs),
3166 GEN_INT (current_frame_info.n_local_regs),
3167 GEN_INT (current_frame_info.n_output_regs),
3168 GEN_INT (current_frame_info.n_rotate_regs)));
3169 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3172 /* Set up frame pointer, stack pointer, and spill iterators. */
3174 n_varargs = cfun->machine->n_varargs;
3175 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3176 stack_pointer_rtx, 0);
3178 if (frame_pointer_needed)
3180 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3181 RTX_FRAME_RELATED_P (insn) = 1;
3184 if (current_frame_info.total_size != 0)
3186 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3187 rtx offset;
3189 if (satisfies_constraint_I (frame_size_rtx))
3190 offset = frame_size_rtx;
3191 else
3193 regno = next_scratch_gr_reg ();
3194 offset = gen_rtx_REG (DImode, regno);
3195 emit_move_insn (offset, frame_size_rtx);
3198 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3199 stack_pointer_rtx, offset));
3201 if (! frame_pointer_needed)
3203 RTX_FRAME_RELATED_P (insn) = 1;
3204 if (GET_CODE (offset) != CONST_INT)
3205 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3206 gen_rtx_SET (VOIDmode,
3207 stack_pointer_rtx,
3208 gen_rtx_PLUS (DImode,
3209 stack_pointer_rtx,
3210 frame_size_rtx)));
3213 /* ??? At this point we must generate a magic insn that appears to
3214 modify the stack pointer, the frame pointer, and all spill
3215 iterators. This would allow the most scheduling freedom. For
3216 now, just hard stop. */
3217 emit_insn (gen_blockage ());
3220 /* Must copy out ar.unat before doing any integer spills. */
3221 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3223 if (current_frame_info.r[reg_save_ar_unat])
3225 ar_unat_save_reg
3226 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3227 reg_emitted (reg_save_ar_unat);
3229 else
3231 alt_regno = next_scratch_gr_reg ();
3232 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3233 current_frame_info.gr_used_mask |= 1 << alt_regno;
3236 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3237 insn = emit_move_insn (ar_unat_save_reg, reg);
3238 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
3240 /* Even if we're not going to generate an epilogue, we still
3241 need to save the register so that EH works. */
3242 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3243 emit_insn (gen_prologue_use (ar_unat_save_reg));
3245 else
3246 ar_unat_save_reg = NULL_RTX;
3248 /* Spill all varargs registers. Do this before spilling any GR registers,
3249 since we want the UNAT bits for the GR registers to override the UNAT
3250 bits from varargs, which we don't care about. */
3252 cfa_off = -16;
3253 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3255 reg = gen_rtx_REG (DImode, regno);
3256 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3259 /* Locate the bottom of the register save area. */
3260 cfa_off = (current_frame_info.spill_cfa_off
3261 + current_frame_info.spill_size
3262 + current_frame_info.extra_spill_size);
3264 /* Save the predicate register block either in a register or in memory. */
3265 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3267 reg = gen_rtx_REG (DImode, PR_REG (0));
3268 if (current_frame_info.r[reg_save_pr] != 0)
3270 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3271 reg_emitted (reg_save_pr);
3272 insn = emit_move_insn (alt_reg, reg);
3274 /* ??? Denote pr spill/fill by a DImode move that modifies all
3275 64 hard registers. */
3276 RTX_FRAME_RELATED_P (insn) = 1;
3277 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3278 gen_rtx_SET (VOIDmode, alt_reg, reg));
3280 /* Even if we're not going to generate an epilogue, we still
3281 need to save the register so that EH works. */
3282 if (! epilogue_p)
3283 emit_insn (gen_prologue_use (alt_reg));
3285 else
3287 alt_regno = next_scratch_gr_reg ();
3288 alt_reg = gen_rtx_REG (DImode, alt_regno);
3289 insn = emit_move_insn (alt_reg, reg);
3290 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3291 cfa_off -= 8;
3295 /* Handle AR regs in numerical order. All of them get special handling. */
3296 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3297 && current_frame_info.r[reg_save_ar_unat] == 0)
3299 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3300 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3301 cfa_off -= 8;
3304 /* The alloc insn already copied ar.pfs into a general register. The
3305 only thing we have to do now is copy that register to a stack slot
3306 if we'd not allocated a local register for the job. */
3307 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3308 && current_frame_info.r[reg_save_ar_pfs] == 0)
3310 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3311 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3312 cfa_off -= 8;
3315 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3317 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3318 if (current_frame_info.r[reg_save_ar_lc] != 0)
3320 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3321 reg_emitted (reg_save_ar_lc);
3322 insn = emit_move_insn (alt_reg, reg);
3323 RTX_FRAME_RELATED_P (insn) = 1;
3325 /* Even if we're not going to generate an epilogue, we still
3326 need to save the register so that EH works. */
3327 if (! epilogue_p)
3328 emit_insn (gen_prologue_use (alt_reg));
3330 else
3332 alt_regno = next_scratch_gr_reg ();
3333 alt_reg = gen_rtx_REG (DImode, alt_regno);
3334 emit_move_insn (alt_reg, reg);
3335 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3336 cfa_off -= 8;
3340 /* Save the return pointer. */
3341 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3343 reg = gen_rtx_REG (DImode, BR_REG (0));
3344 if (current_frame_info.r[reg_save_b0] != 0)
3346 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3347 reg_emitted (reg_save_b0);
3348 insn = emit_move_insn (alt_reg, reg);
3349 RTX_FRAME_RELATED_P (insn) = 1;
3351 /* Even if we're not going to generate an epilogue, we still
3352 need to save the register so that EH works. */
3353 if (! epilogue_p)
3354 emit_insn (gen_prologue_use (alt_reg));
3356 else
3358 alt_regno = next_scratch_gr_reg ();
3359 alt_reg = gen_rtx_REG (DImode, alt_regno);
3360 emit_move_insn (alt_reg, reg);
3361 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3362 cfa_off -= 8;
3366 if (current_frame_info.r[reg_save_gp])
3368 reg_emitted (reg_save_gp);
3369 insn = emit_move_insn (gen_rtx_REG (DImode,
3370 current_frame_info.r[reg_save_gp]),
3371 pic_offset_table_rtx);
3374 /* We should now be at the base of the gr/br/fr spill area. */
3375 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3376 + current_frame_info.spill_size));
3378 /* Spill all general registers. */
3379 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3380 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3382 reg = gen_rtx_REG (DImode, regno);
3383 do_spill (gen_gr_spill, reg, cfa_off, reg);
3384 cfa_off -= 8;
3387 /* Spill the rest of the BR registers. */
3388 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3389 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3391 alt_regno = next_scratch_gr_reg ();
3392 alt_reg = gen_rtx_REG (DImode, alt_regno);
3393 reg = gen_rtx_REG (DImode, regno);
3394 emit_move_insn (alt_reg, reg);
3395 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3396 cfa_off -= 8;
3399 /* Align the frame and spill all FR registers. */
3400 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3401 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3403 gcc_assert (!(cfa_off & 15));
3404 reg = gen_rtx_REG (XFmode, regno);
3405 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3406 cfa_off -= 16;
3409 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3411 finish_spill_pointers ();
3414 /* Called after register allocation to add any instructions needed for the
3415 epilogue. Using an epilogue insn is favored compared to putting all of the
3416 instructions in output_function_prologue(), since it allows the scheduler
3417 to intermix instructions with the saves of the caller saved registers. In
3418 some cases, it might be necessary to emit a barrier instruction as the last
3419 insn to prevent such scheduling. */
3421 void
3422 ia64_expand_epilogue (int sibcall_p)
3424 rtx insn, reg, alt_reg, ar_unat_save_reg;
3425 int regno, alt_regno, cfa_off;
3427 ia64_compute_frame_size (get_frame_size ());
3429 /* If there is a frame pointer, then we use it instead of the stack
3430 pointer, so that the stack pointer does not need to be valid when
3431 the epilogue starts. See EXIT_IGNORE_STACK. */
3432 if (frame_pointer_needed)
3433 setup_spill_pointers (current_frame_info.n_spilled,
3434 hard_frame_pointer_rtx, 0);
3435 else
3436 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3437 current_frame_info.total_size);
3439 if (current_frame_info.total_size != 0)
3441 /* ??? At this point we must generate a magic insn that appears to
3442 modify the spill iterators and the frame pointer. This would
3443 allow the most scheduling freedom. For now, just hard stop. */
3444 emit_insn (gen_blockage ());
3447 /* Locate the bottom of the register save area. */
3448 cfa_off = (current_frame_info.spill_cfa_off
3449 + current_frame_info.spill_size
3450 + current_frame_info.extra_spill_size);
3452 /* Restore the predicate registers. */
3453 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3455 if (current_frame_info.r[reg_save_pr] != 0)
3457 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3458 reg_emitted (reg_save_pr);
3460 else
3462 alt_regno = next_scratch_gr_reg ();
3463 alt_reg = gen_rtx_REG (DImode, alt_regno);
3464 do_restore (gen_movdi_x, alt_reg, cfa_off);
3465 cfa_off -= 8;
3467 reg = gen_rtx_REG (DImode, PR_REG (0));
3468 emit_move_insn (reg, alt_reg);
3471 /* Restore the application registers. */
3473 /* Load the saved unat from the stack, but do not restore it until
3474 after the GRs have been restored. */
3475 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3477 if (current_frame_info.r[reg_save_ar_unat] != 0)
3479 ar_unat_save_reg
3480 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3481 reg_emitted (reg_save_ar_unat);
3483 else
3485 alt_regno = next_scratch_gr_reg ();
3486 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3487 current_frame_info.gr_used_mask |= 1 << alt_regno;
3488 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3489 cfa_off -= 8;
3492 else
3493 ar_unat_save_reg = NULL_RTX;
3495 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3497 reg_emitted (reg_save_ar_pfs);
3498 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3499 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3500 emit_move_insn (reg, alt_reg);
3502 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3504 alt_regno = next_scratch_gr_reg ();
3505 alt_reg = gen_rtx_REG (DImode, alt_regno);
3506 do_restore (gen_movdi_x, alt_reg, cfa_off);
3507 cfa_off -= 8;
3508 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3509 emit_move_insn (reg, alt_reg);
3512 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3514 if (current_frame_info.r[reg_save_ar_lc] != 0)
3516 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3517 reg_emitted (reg_save_ar_lc);
3519 else
3521 alt_regno = next_scratch_gr_reg ();
3522 alt_reg = gen_rtx_REG (DImode, alt_regno);
3523 do_restore (gen_movdi_x, alt_reg, cfa_off);
3524 cfa_off -= 8;
3526 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3527 emit_move_insn (reg, alt_reg);
3530 /* Restore the return pointer. */
3531 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3533 if (current_frame_info.r[reg_save_b0] != 0)
3535 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3536 reg_emitted (reg_save_b0);
3538 else
3540 alt_regno = next_scratch_gr_reg ();
3541 alt_reg = gen_rtx_REG (DImode, alt_regno);
3542 do_restore (gen_movdi_x, alt_reg, cfa_off);
3543 cfa_off -= 8;
3545 reg = gen_rtx_REG (DImode, BR_REG (0));
3546 emit_move_insn (reg, alt_reg);
3549 /* We should now be at the base of the gr/br/fr spill area. */
3550 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3551 + current_frame_info.spill_size));
3553 /* The GP may be stored on the stack in the prologue, but it's
3554 never restored in the epilogue. Skip the stack slot. */
3555 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3556 cfa_off -= 8;
3558 /* Restore all general registers. */
3559 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3560 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3562 reg = gen_rtx_REG (DImode, regno);
3563 do_restore (gen_gr_restore, reg, cfa_off);
3564 cfa_off -= 8;
3567 /* Restore the branch registers. */
3568 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3569 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3571 alt_regno = next_scratch_gr_reg ();
3572 alt_reg = gen_rtx_REG (DImode, alt_regno);
3573 do_restore (gen_movdi_x, alt_reg, cfa_off);
3574 cfa_off -= 8;
3575 reg = gen_rtx_REG (DImode, regno);
3576 emit_move_insn (reg, alt_reg);
3579 /* Restore floating point registers. */
3580 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3581 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3583 gcc_assert (!(cfa_off & 15));
3584 reg = gen_rtx_REG (XFmode, regno);
3585 do_restore (gen_fr_restore_x, reg, cfa_off);
3586 cfa_off -= 16;
3589 /* Restore ar.unat for real. */
3590 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3592 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3593 emit_move_insn (reg, ar_unat_save_reg);
3596 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3598 finish_spill_pointers ();
3600 if (current_frame_info.total_size
3601 || cfun->machine->ia64_eh_epilogue_sp
3602 || frame_pointer_needed)
3604 /* ??? At this point we must generate a magic insn that appears to
3605 modify the spill iterators, the stack pointer, and the frame
3606 pointer. This would allow the most scheduling freedom. For now,
3607 just hard stop. */
3608 emit_insn (gen_blockage ());
3611 if (cfun->machine->ia64_eh_epilogue_sp)
3612 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3613 else if (frame_pointer_needed)
3615 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3616 RTX_FRAME_RELATED_P (insn) = 1;
3618 else if (current_frame_info.total_size)
3620 rtx offset, frame_size_rtx;
3622 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3623 if (satisfies_constraint_I (frame_size_rtx))
3624 offset = frame_size_rtx;
3625 else
3627 regno = next_scratch_gr_reg ();
3628 offset = gen_rtx_REG (DImode, regno);
3629 emit_move_insn (offset, frame_size_rtx);
3632 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3633 offset));
3635 RTX_FRAME_RELATED_P (insn) = 1;
3636 if (GET_CODE (offset) != CONST_INT)
3637 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3638 gen_rtx_SET (VOIDmode,
3639 stack_pointer_rtx,
3640 gen_rtx_PLUS (DImode,
3641 stack_pointer_rtx,
3642 frame_size_rtx)));
3645 if (cfun->machine->ia64_eh_epilogue_bsp)
3646 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3648 if (! sibcall_p)
3649 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3650 else
3652 int fp = GR_REG (2);
3653 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3654 first available call clobbered register. If there was a frame_pointer
3655 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3656 so we have to make sure we're using the string "r2" when emitting
3657 the register name for the assembler. */
3658 if (current_frame_info.r[reg_fp]
3659 && current_frame_info.r[reg_fp] == GR_REG (2))
3660 fp = HARD_FRAME_POINTER_REGNUM;
3662 /* We must emit an alloc to force the input registers to become output
3663 registers. Otherwise, if the callee tries to pass its parameters
3664 through to another call without an intervening alloc, then these
3665 values get lost. */
3666 /* ??? We don't need to preserve all input registers. We only need to
3667 preserve those input registers used as arguments to the sibling call.
3668 It is unclear how to compute that number here. */
3669 if (current_frame_info.n_input_regs != 0)
3671 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3672 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3673 const0_rtx, const0_rtx,
3674 n_inputs, const0_rtx));
3675 RTX_FRAME_RELATED_P (insn) = 1;
3680 /* Return 1 if br.ret can do all the work required to return from a
3681 function. */
3684 ia64_direct_return (void)
3686 if (reload_completed && ! frame_pointer_needed)
3688 ia64_compute_frame_size (get_frame_size ());
3690 return (current_frame_info.total_size == 0
3691 && current_frame_info.n_spilled == 0
3692 && current_frame_info.r[reg_save_b0] == 0
3693 && current_frame_info.r[reg_save_pr] == 0
3694 && current_frame_info.r[reg_save_ar_pfs] == 0
3695 && current_frame_info.r[reg_save_ar_unat] == 0
3696 && current_frame_info.r[reg_save_ar_lc] == 0);
3698 return 0;
3701 /* Return the magic cookie that we use to hold the return address
3702 during early compilation. */
3705 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3707 if (count != 0)
3708 return NULL;
3709 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3712 /* Split this value after reload, now that we know where the return
3713 address is saved. */
3715 void
3716 ia64_split_return_addr_rtx (rtx dest)
3718 rtx src;
3720 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3722 if (current_frame_info.r[reg_save_b0] != 0)
3724 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3725 reg_emitted (reg_save_b0);
3727 else
3729 HOST_WIDE_INT off;
3730 unsigned int regno;
3731 rtx off_r;
3733 /* Compute offset from CFA for BR0. */
3734 /* ??? Must be kept in sync with ia64_expand_prologue. */
3735 off = (current_frame_info.spill_cfa_off
3736 + current_frame_info.spill_size);
3737 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3738 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3739 off -= 8;
3741 /* Convert CFA offset to a register based offset. */
3742 if (frame_pointer_needed)
3743 src = hard_frame_pointer_rtx;
3744 else
3746 src = stack_pointer_rtx;
3747 off += current_frame_info.total_size;
3750 /* Load address into scratch register. */
3751 off_r = GEN_INT (off);
3752 if (satisfies_constraint_I (off_r))
3753 emit_insn (gen_adddi3 (dest, src, off_r));
3754 else
3756 emit_move_insn (dest, off_r);
3757 emit_insn (gen_adddi3 (dest, src, dest));
3760 src = gen_rtx_MEM (Pmode, dest);
3763 else
3764 src = gen_rtx_REG (DImode, BR_REG (0));
3766 emit_move_insn (dest, src);
3770 ia64_hard_regno_rename_ok (int from, int to)
3772 /* Don't clobber any of the registers we reserved for the prologue. */
3773 unsigned int r;
3775 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3776 if (to == current_frame_info.r[r]
3777 || from == current_frame_info.r[r]
3778 || to == emitted_frame_related_regs[r]
3779 || from == emitted_frame_related_regs[r])
3780 return 0;
3782 /* Don't use output registers outside the register frame. */
3783 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3784 return 0;
3786 /* Retain even/oddness on predicate register pairs. */
3787 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3788 return (from & 1) == (to & 1);
3790 return 1;
3793 /* Target hook for assembling integer objects. Handle word-sized
3794 aligned objects and detect the cases when @fptr is needed. */
3796 static bool
3797 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3799 if (size == POINTER_SIZE / BITS_PER_UNIT
3800 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3801 && GET_CODE (x) == SYMBOL_REF
3802 && SYMBOL_REF_FUNCTION_P (x))
3804 static const char * const directive[2][2] = {
3805 /* 64-bit pointer */ /* 32-bit pointer */
3806 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3807 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3809 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3810 output_addr_const (asm_out_file, x);
3811 fputs (")\n", asm_out_file);
3812 return true;
3814 return default_assemble_integer (x, size, aligned_p);
3817 /* Emit the function prologue. */
3819 static void
3820 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3822 int mask, grsave, grsave_prev;
3824 if (current_frame_info.need_regstk)
3825 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3826 current_frame_info.n_input_regs,
3827 current_frame_info.n_local_regs,
3828 current_frame_info.n_output_regs,
3829 current_frame_info.n_rotate_regs);
3831 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3832 return;
3834 /* Emit the .prologue directive. */
3836 mask = 0;
3837 grsave = grsave_prev = 0;
3838 if (current_frame_info.r[reg_save_b0] != 0)
3840 mask |= 8;
3841 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3843 if (current_frame_info.r[reg_save_ar_pfs] != 0
3844 && (grsave_prev == 0
3845 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3847 mask |= 4;
3848 if (grsave_prev == 0)
3849 grsave = current_frame_info.r[reg_save_ar_pfs];
3850 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3852 if (current_frame_info.r[reg_fp] != 0
3853 && (grsave_prev == 0
3854 || current_frame_info.r[reg_fp] == grsave_prev + 1))
3856 mask |= 2;
3857 if (grsave_prev == 0)
3858 grsave = HARD_FRAME_POINTER_REGNUM;
3859 grsave_prev = current_frame_info.r[reg_fp];
3861 if (current_frame_info.r[reg_save_pr] != 0
3862 && (grsave_prev == 0
3863 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3865 mask |= 1;
3866 if (grsave_prev == 0)
3867 grsave = current_frame_info.r[reg_save_pr];
3870 if (mask && TARGET_GNU_AS)
3871 fprintf (file, "\t.prologue %d, %d\n", mask,
3872 ia64_dbx_register_number (grsave));
3873 else
3874 fputs ("\t.prologue\n", file);
3876 /* Emit a .spill directive, if necessary, to relocate the base of
3877 the register spill area. */
3878 if (current_frame_info.spill_cfa_off != -16)
3879 fprintf (file, "\t.spill %ld\n",
3880 (long) (current_frame_info.spill_cfa_off
3881 + current_frame_info.spill_size));
3884 /* Emit the .body directive at the scheduled end of the prologue. */
3886 static void
3887 ia64_output_function_end_prologue (FILE *file)
3889 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3890 return;
3892 fputs ("\t.body\n", file);
3895 /* Emit the function epilogue. */
3897 static void
3898 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3899 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3901 int i;
3903 if (current_frame_info.r[reg_fp])
3905 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3906 reg_names[HARD_FRAME_POINTER_REGNUM]
3907 = reg_names[current_frame_info.r[reg_fp]];
3908 reg_names[current_frame_info.r[reg_fp]] = tmp;
3909 reg_emitted (reg_fp);
3911 if (! TARGET_REG_NAMES)
3913 for (i = 0; i < current_frame_info.n_input_regs; i++)
3914 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3915 for (i = 0; i < current_frame_info.n_local_regs; i++)
3916 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3917 for (i = 0; i < current_frame_info.n_output_regs; i++)
3918 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3921 current_frame_info.initialized = 0;
3925 ia64_dbx_register_number (int regno)
3927 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3928 from its home at loc79 to something inside the register frame. We
3929 must perform the same renumbering here for the debug info. */
3930 if (current_frame_info.r[reg_fp])
3932 if (regno == HARD_FRAME_POINTER_REGNUM)
3933 regno = current_frame_info.r[reg_fp];
3934 else if (regno == current_frame_info.r[reg_fp])
3935 regno = HARD_FRAME_POINTER_REGNUM;
3938 if (IN_REGNO_P (regno))
3939 return 32 + regno - IN_REG (0);
3940 else if (LOC_REGNO_P (regno))
3941 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3942 else if (OUT_REGNO_P (regno))
3943 return (32 + current_frame_info.n_input_regs
3944 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3945 else
3946 return regno;
3949 /* Implement TARGET_TRAMPOLINE_INIT.
3951 The trampoline should set the static chain pointer to value placed
3952 into the trampoline and should branch to the specified routine.
3953 To make the normal indirect-subroutine calling convention work,
3954 the trampoline must look like a function descriptor; the first
3955 word being the target address and the second being the target's
3956 global pointer.
3958 We abuse the concept of a global pointer by arranging for it
3959 to point to the data we need to load. The complete trampoline
3960 has the following form:
3962 +-------------------+ \
3963 TRAMP: | __ia64_trampoline | |
3964 +-------------------+ > fake function descriptor
3965 | TRAMP+16 | |
3966 +-------------------+ /
3967 | target descriptor |
3968 +-------------------+
3969 | static link |
3970 +-------------------+
3973 static void
3974 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
3976 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3977 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
3979 /* The Intel assembler requires that the global __ia64_trampoline symbol
3980 be declared explicitly */
3981 if (!TARGET_GNU_AS)
3983 static bool declared_ia64_trampoline = false;
3985 if (!declared_ia64_trampoline)
3987 declared_ia64_trampoline = true;
3988 (*targetm.asm_out.globalize_label) (asm_out_file,
3989 "__ia64_trampoline");
3993 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3994 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
3995 fnaddr = convert_memory_address (Pmode, fnaddr);
3996 static_chain = convert_memory_address (Pmode, static_chain);
3998 /* Load up our iterator. */
3999 addr_reg = copy_to_reg (addr);
4000 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4002 /* The first two words are the fake descriptor:
4003 __ia64_trampoline, ADDR+16. */
4004 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4005 if (TARGET_ABI_OPEN_VMS)
4007 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4008 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4009 relocation against function symbols to make it identical to the
4010 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4011 strict ELF and dereference to get the bare code address. */
4012 rtx reg = gen_reg_rtx (Pmode);
4013 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4014 emit_move_insn (reg, tramp);
4015 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4016 tramp = reg;
4018 emit_move_insn (m_tramp, tramp);
4019 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4020 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4022 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4023 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4024 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4026 /* The third word is the target descriptor. */
4027 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4028 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4029 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4031 /* The fourth word is the static chain. */
4032 emit_move_insn (m_tramp, static_chain);
4035 /* Do any needed setup for a variadic function. CUM has not been updated
4036 for the last named argument which has type TYPE and mode MODE.
4038 We generate the actual spill instructions during prologue generation. */
4040 static void
4041 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4042 tree type, int * pretend_size,
4043 int second_time ATTRIBUTE_UNUSED)
4045 CUMULATIVE_ARGS next_cum = *cum;
4047 /* Skip the current argument. */
4048 ia64_function_arg_advance (&next_cum, mode, type, 1);
4050 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4052 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4053 *pretend_size = n * UNITS_PER_WORD;
4054 cfun->machine->n_varargs = n;
4058 /* Check whether TYPE is a homogeneous floating point aggregate. If
4059 it is, return the mode of the floating point type that appears
4060 in all leafs. If it is not, return VOIDmode.
4062 An aggregate is a homogeneous floating point aggregate is if all
4063 fields/elements in it have the same floating point type (e.g,
4064 SFmode). 128-bit quad-precision floats are excluded.
4066 Variable sized aggregates should never arrive here, since we should
4067 have already decided to pass them by reference. Top-level zero-sized
4068 aggregates are excluded because our parallels crash the middle-end. */
4070 static enum machine_mode
4071 hfa_element_mode (const_tree type, bool nested)
4073 enum machine_mode element_mode = VOIDmode;
4074 enum machine_mode mode;
4075 enum tree_code code = TREE_CODE (type);
4076 int know_element_mode = 0;
4077 tree t;
4079 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4080 return VOIDmode;
4082 switch (code)
4084 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4085 case BOOLEAN_TYPE: case POINTER_TYPE:
4086 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4087 case LANG_TYPE: case FUNCTION_TYPE:
4088 return VOIDmode;
4090 /* Fortran complex types are supposed to be HFAs, so we need to handle
4091 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4092 types though. */
4093 case COMPLEX_TYPE:
4094 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4095 && TYPE_MODE (type) != TCmode)
4096 return GET_MODE_INNER (TYPE_MODE (type));
4097 else
4098 return VOIDmode;
4100 case REAL_TYPE:
4101 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4102 mode if this is contained within an aggregate. */
4103 if (nested && TYPE_MODE (type) != TFmode)
4104 return TYPE_MODE (type);
4105 else
4106 return VOIDmode;
4108 case ARRAY_TYPE:
4109 return hfa_element_mode (TREE_TYPE (type), 1);
4111 case RECORD_TYPE:
4112 case UNION_TYPE:
4113 case QUAL_UNION_TYPE:
4114 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
4116 if (TREE_CODE (t) != FIELD_DECL)
4117 continue;
4119 mode = hfa_element_mode (TREE_TYPE (t), 1);
4120 if (know_element_mode)
4122 if (mode != element_mode)
4123 return VOIDmode;
4125 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4126 return VOIDmode;
4127 else
4129 know_element_mode = 1;
4130 element_mode = mode;
4133 return element_mode;
4135 default:
4136 /* If we reach here, we probably have some front-end specific type
4137 that the backend doesn't know about. This can happen via the
4138 aggregate_value_p call in init_function_start. All we can do is
4139 ignore unknown tree types. */
4140 return VOIDmode;
4143 return VOIDmode;
4146 /* Return the number of words required to hold a quantity of TYPE and MODE
4147 when passed as an argument. */
4148 static int
4149 ia64_function_arg_words (tree type, enum machine_mode mode)
4151 int words;
4153 if (mode == BLKmode)
4154 words = int_size_in_bytes (type);
4155 else
4156 words = GET_MODE_SIZE (mode);
4158 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4161 /* Return the number of registers that should be skipped so the current
4162 argument (described by TYPE and WORDS) will be properly aligned.
4164 Integer and float arguments larger than 8 bytes start at the next
4165 even boundary. Aggregates larger than 8 bytes start at the next
4166 even boundary if the aggregate has 16 byte alignment. Note that
4167 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4168 but are still to be aligned in registers.
4170 ??? The ABI does not specify how to handle aggregates with
4171 alignment from 9 to 15 bytes, or greater than 16. We handle them
4172 all as if they had 16 byte alignment. Such aggregates can occur
4173 only if gcc extensions are used. */
4174 static int
4175 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4177 /* No registers are skipped on VMS. */
4178 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4179 return 0;
4181 if (type
4182 && TREE_CODE (type) != INTEGER_TYPE
4183 && TREE_CODE (type) != REAL_TYPE)
4184 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4185 else
4186 return words > 1;
4189 /* Return rtx for register where argument is passed, or zero if it is passed
4190 on the stack. */
4191 /* ??? 128-bit quad-precision floats are always passed in general
4192 registers. */
4195 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4196 int named, int incoming)
4198 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4199 int words = ia64_function_arg_words (type, mode);
4200 int offset = ia64_function_arg_offset (cum, type, words);
4201 enum machine_mode hfa_mode = VOIDmode;
4203 /* For OPEN VMS, emit the instruction setting up the argument register here,
4204 when we know this will be together with the other arguments setup related
4205 insns. This is not the conceptually best place to do this, but this is
4206 the easiest as we have convenient access to cumulative args info. */
4208 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4209 && named == 1)
4211 unsigned HOST_WIDE_INT regval = cum->words;
4212 int i;
4214 for (i = 0; i < 8; i++)
4215 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4217 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4218 GEN_INT (regval));
4221 /* If all argument slots are used, then it must go on the stack. */
4222 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4223 return 0;
4225 /* Check for and handle homogeneous FP aggregates. */
4226 if (type)
4227 hfa_mode = hfa_element_mode (type, 0);
4229 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4230 and unprototyped hfas are passed specially. */
4231 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4233 rtx loc[16];
4234 int i = 0;
4235 int fp_regs = cum->fp_regs;
4236 int int_regs = cum->words + offset;
4237 int hfa_size = GET_MODE_SIZE (hfa_mode);
4238 int byte_size;
4239 int args_byte_size;
4241 /* If prototyped, pass it in FR regs then GR regs.
4242 If not prototyped, pass it in both FR and GR regs.
4244 If this is an SFmode aggregate, then it is possible to run out of
4245 FR regs while GR regs are still left. In that case, we pass the
4246 remaining part in the GR regs. */
4248 /* Fill the FP regs. We do this always. We stop if we reach the end
4249 of the argument, the last FP register, or the last argument slot. */
4251 byte_size = ((mode == BLKmode)
4252 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4253 args_byte_size = int_regs * UNITS_PER_WORD;
4254 offset = 0;
4255 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4256 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4258 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4259 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4260 + fp_regs)),
4261 GEN_INT (offset));
4262 offset += hfa_size;
4263 args_byte_size += hfa_size;
4264 fp_regs++;
4267 /* If no prototype, then the whole thing must go in GR regs. */
4268 if (! cum->prototype)
4269 offset = 0;
4270 /* If this is an SFmode aggregate, then we might have some left over
4271 that needs to go in GR regs. */
4272 else if (byte_size != offset)
4273 int_regs += offset / UNITS_PER_WORD;
4275 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4277 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4279 enum machine_mode gr_mode = DImode;
4280 unsigned int gr_size;
4282 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4283 then this goes in a GR reg left adjusted/little endian, right
4284 adjusted/big endian. */
4285 /* ??? Currently this is handled wrong, because 4-byte hunks are
4286 always right adjusted/little endian. */
4287 if (offset & 0x4)
4288 gr_mode = SImode;
4289 /* If we have an even 4 byte hunk because the aggregate is a
4290 multiple of 4 bytes in size, then this goes in a GR reg right
4291 adjusted/little endian. */
4292 else if (byte_size - offset == 4)
4293 gr_mode = SImode;
4295 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4296 gen_rtx_REG (gr_mode, (basereg
4297 + int_regs)),
4298 GEN_INT (offset));
4300 gr_size = GET_MODE_SIZE (gr_mode);
4301 offset += gr_size;
4302 if (gr_size == UNITS_PER_WORD
4303 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4304 int_regs++;
4305 else if (gr_size > UNITS_PER_WORD)
4306 int_regs += gr_size / UNITS_PER_WORD;
4308 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4311 /* On OpenVMS variable argument is either in Rn or Fn. */
4312 else if (TARGET_ABI_OPEN_VMS && named == 0)
4314 if (FLOAT_MODE_P (mode))
4315 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4316 else
4317 return gen_rtx_REG (mode, basereg + cum->words);
4320 /* Integral and aggregates go in general registers. If we have run out of
4321 FR registers, then FP values must also go in general registers. This can
4322 happen when we have a SFmode HFA. */
4323 else if (mode == TFmode || mode == TCmode
4324 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4326 int byte_size = ((mode == BLKmode)
4327 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4328 if (BYTES_BIG_ENDIAN
4329 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4330 && byte_size < UNITS_PER_WORD
4331 && byte_size > 0)
4333 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4334 gen_rtx_REG (DImode,
4335 (basereg + cum->words
4336 + offset)),
4337 const0_rtx);
4338 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4340 else
4341 return gen_rtx_REG (mode, basereg + cum->words + offset);
4345 /* If there is a prototype, then FP values go in a FR register when
4346 named, and in a GR register when unnamed. */
4347 else if (cum->prototype)
4349 if (named)
4350 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4351 /* In big-endian mode, an anonymous SFmode value must be represented
4352 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4353 the value into the high half of the general register. */
4354 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4355 return gen_rtx_PARALLEL (mode,
4356 gen_rtvec (1,
4357 gen_rtx_EXPR_LIST (VOIDmode,
4358 gen_rtx_REG (DImode, basereg + cum->words + offset),
4359 const0_rtx)));
4360 else
4361 return gen_rtx_REG (mode, basereg + cum->words + offset);
4363 /* If there is no prototype, then FP values go in both FR and GR
4364 registers. */
4365 else
4367 /* See comment above. */
4368 enum machine_mode inner_mode =
4369 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4371 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4372 gen_rtx_REG (mode, (FR_ARG_FIRST
4373 + cum->fp_regs)),
4374 const0_rtx);
4375 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4376 gen_rtx_REG (inner_mode,
4377 (basereg + cum->words
4378 + offset)),
4379 const0_rtx);
4381 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4385 /* Return number of bytes, at the beginning of the argument, that must be
4386 put in registers. 0 is the argument is entirely in registers or entirely
4387 in memory. */
4389 static int
4390 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4391 tree type, bool named ATTRIBUTE_UNUSED)
4393 int words = ia64_function_arg_words (type, mode);
4394 int offset = ia64_function_arg_offset (cum, type, words);
4396 /* If all argument slots are used, then it must go on the stack. */
4397 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4398 return 0;
4400 /* It doesn't matter whether the argument goes in FR or GR regs. If
4401 it fits within the 8 argument slots, then it goes entirely in
4402 registers. If it extends past the last argument slot, then the rest
4403 goes on the stack. */
4405 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4406 return 0;
4408 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4411 /* Return ivms_arg_type based on machine_mode. */
4413 static enum ivms_arg_type
4414 ia64_arg_type (enum machine_mode mode)
4416 switch (mode)
4418 case SFmode:
4419 return FS;
4420 case DFmode:
4421 return FT;
4422 default:
4423 return I64;
4427 /* Update CUM to point after this argument. This is patterned after
4428 ia64_function_arg. */
4430 void
4431 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4432 tree type, int named)
4434 int words = ia64_function_arg_words (type, mode);
4435 int offset = ia64_function_arg_offset (cum, type, words);
4436 enum machine_mode hfa_mode = VOIDmode;
4438 /* If all arg slots are already full, then there is nothing to do. */
4439 if (cum->words >= MAX_ARGUMENT_SLOTS)
4441 cum->words += words + offset;
4442 return;
4445 cum->atypes[cum->words] = ia64_arg_type (mode);
4446 cum->words += words + offset;
4448 /* Check for and handle homogeneous FP aggregates. */
4449 if (type)
4450 hfa_mode = hfa_element_mode (type, 0);
4452 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4453 and unprototyped hfas are passed specially. */
4454 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4456 int fp_regs = cum->fp_regs;
4457 /* This is the original value of cum->words + offset. */
4458 int int_regs = cum->words - words;
4459 int hfa_size = GET_MODE_SIZE (hfa_mode);
4460 int byte_size;
4461 int args_byte_size;
4463 /* If prototyped, pass it in FR regs then GR regs.
4464 If not prototyped, pass it in both FR and GR regs.
4466 If this is an SFmode aggregate, then it is possible to run out of
4467 FR regs while GR regs are still left. In that case, we pass the
4468 remaining part in the GR regs. */
4470 /* Fill the FP regs. We do this always. We stop if we reach the end
4471 of the argument, the last FP register, or the last argument slot. */
4473 byte_size = ((mode == BLKmode)
4474 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4475 args_byte_size = int_regs * UNITS_PER_WORD;
4476 offset = 0;
4477 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4478 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4480 offset += hfa_size;
4481 args_byte_size += hfa_size;
4482 fp_regs++;
4485 cum->fp_regs = fp_regs;
4488 /* On OpenVMS variable argument is either in Rn or Fn. */
4489 else if (TARGET_ABI_OPEN_VMS && named == 0)
4491 cum->int_regs = cum->words;
4492 cum->fp_regs = cum->words;
4495 /* Integral and aggregates go in general registers. So do TFmode FP values.
4496 If we have run out of FR registers, then other FP values must also go in
4497 general registers. This can happen when we have a SFmode HFA. */
4498 else if (mode == TFmode || mode == TCmode
4499 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4500 cum->int_regs = cum->words;
4502 /* If there is a prototype, then FP values go in a FR register when
4503 named, and in a GR register when unnamed. */
4504 else if (cum->prototype)
4506 if (! named)
4507 cum->int_regs = cum->words;
4508 else
4509 /* ??? Complex types should not reach here. */
4510 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4512 /* If there is no prototype, then FP values go in both FR and GR
4513 registers. */
4514 else
4516 /* ??? Complex types should not reach here. */
4517 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4518 cum->int_regs = cum->words;
4522 /* Arguments with alignment larger than 8 bytes start at the next even
4523 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4524 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4527 ia64_function_arg_boundary (enum machine_mode mode, tree type)
4530 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4531 return PARM_BOUNDARY * 2;
4533 if (type)
4535 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4536 return PARM_BOUNDARY * 2;
4537 else
4538 return PARM_BOUNDARY;
4541 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4542 return PARM_BOUNDARY * 2;
4543 else
4544 return PARM_BOUNDARY;
4547 /* True if it is OK to do sibling call optimization for the specified
4548 call expression EXP. DECL will be the called function, or NULL if
4549 this is an indirect call. */
4550 static bool
4551 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4553 /* We can't perform a sibcall if the current function has the syscall_linkage
4554 attribute. */
4555 if (lookup_attribute ("syscall_linkage",
4556 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4557 return false;
4559 /* We must always return with our current GP. This means we can
4560 only sibcall to functions defined in the current module unless
4561 TARGET_CONST_GP is set to true. */
4562 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4566 /* Implement va_arg. */
4568 static tree
4569 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4570 gimple_seq *post_p)
4572 /* Variable sized types are passed by reference. */
4573 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4575 tree ptrtype = build_pointer_type (type);
4576 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4577 return build_va_arg_indirect_ref (addr);
4580 /* Aggregate arguments with alignment larger than 8 bytes start at
4581 the next even boundary. Integer and floating point arguments
4582 do so if they are larger than 8 bytes, whether or not they are
4583 also aligned larger than 8 bytes. */
4584 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4585 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4587 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4588 size_int (2 * UNITS_PER_WORD - 1));
4589 t = fold_convert (sizetype, t);
4590 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4591 size_int (-2 * UNITS_PER_WORD));
4592 t = fold_convert (TREE_TYPE (valist), t);
4593 gimplify_assign (unshare_expr (valist), t, pre_p);
4596 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4599 /* Return 1 if function return value returned in memory. Return 0 if it is
4600 in a register. */
4602 static bool
4603 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4605 enum machine_mode mode;
4606 enum machine_mode hfa_mode;
4607 HOST_WIDE_INT byte_size;
4609 mode = TYPE_MODE (valtype);
4610 byte_size = GET_MODE_SIZE (mode);
4611 if (mode == BLKmode)
4613 byte_size = int_size_in_bytes (valtype);
4614 if (byte_size < 0)
4615 return true;
4618 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4620 hfa_mode = hfa_element_mode (valtype, 0);
4621 if (hfa_mode != VOIDmode)
4623 int hfa_size = GET_MODE_SIZE (hfa_mode);
4625 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4626 return true;
4627 else
4628 return false;
4630 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4631 return true;
4632 else
4633 return false;
4636 /* Return rtx for register that holds the function return value. */
4639 ia64_function_value (const_tree valtype, const_tree func)
4641 enum machine_mode mode;
4642 enum machine_mode hfa_mode;
4643 int unsignedp;
4645 mode = TYPE_MODE (valtype);
4646 hfa_mode = hfa_element_mode (valtype, 0);
4648 if (hfa_mode != VOIDmode)
4650 rtx loc[8];
4651 int i;
4652 int hfa_size;
4653 int byte_size;
4654 int offset;
4656 hfa_size = GET_MODE_SIZE (hfa_mode);
4657 byte_size = ((mode == BLKmode)
4658 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4659 offset = 0;
4660 for (i = 0; offset < byte_size; i++)
4662 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4663 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4664 GEN_INT (offset));
4665 offset += hfa_size;
4667 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4669 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4670 return gen_rtx_REG (mode, FR_ARG_FIRST);
4671 else
4673 bool need_parallel = false;
4675 /* In big-endian mode, we need to manage the layout of aggregates
4676 in the registers so that we get the bits properly aligned in
4677 the highpart of the registers. */
4678 if (BYTES_BIG_ENDIAN
4679 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4680 need_parallel = true;
4682 /* Something like struct S { long double x; char a[0] } is not an
4683 HFA structure, and therefore doesn't go in fp registers. But
4684 the middle-end will give it XFmode anyway, and XFmode values
4685 don't normally fit in integer registers. So we need to smuggle
4686 the value inside a parallel. */
4687 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4688 need_parallel = true;
4690 if (need_parallel)
4692 rtx loc[8];
4693 int offset;
4694 int bytesize;
4695 int i;
4697 offset = 0;
4698 bytesize = int_size_in_bytes (valtype);
4699 /* An empty PARALLEL is invalid here, but the return value
4700 doesn't matter for empty structs. */
4701 if (bytesize == 0)
4702 return gen_rtx_REG (mode, GR_RET_FIRST);
4703 for (i = 0; offset < bytesize; i++)
4705 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4706 gen_rtx_REG (DImode,
4707 GR_RET_FIRST + i),
4708 GEN_INT (offset));
4709 offset += UNITS_PER_WORD;
4711 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4714 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4715 func ? TREE_TYPE (func) : NULL_TREE,
4716 true);
4718 return gen_rtx_REG (mode, GR_RET_FIRST);
4722 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4723 We need to emit DTP-relative relocations. */
4725 static void
4726 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4728 gcc_assert (size == 4 || size == 8);
4729 if (size == 4)
4730 fputs ("\tdata4.ua\t@dtprel(", file);
4731 else
4732 fputs ("\tdata8.ua\t@dtprel(", file);
4733 output_addr_const (file, x);
4734 fputs (")", file);
4737 /* Print a memory address as an operand to reference that memory location. */
4739 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4740 also call this from ia64_print_operand for memory addresses. */
4742 void
4743 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4744 rtx address ATTRIBUTE_UNUSED)
4748 /* Print an operand to an assembler instruction.
4749 C Swap and print a comparison operator.
4750 D Print an FP comparison operator.
4751 E Print 32 - constant, for SImode shifts as extract.
4752 e Print 64 - constant, for DImode rotates.
4753 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4754 a floating point register emitted normally.
4755 G A floating point constant.
4756 I Invert a predicate register by adding 1.
4757 J Select the proper predicate register for a condition.
4758 j Select the inverse predicate register for a condition.
4759 O Append .acq for volatile load.
4760 P Postincrement of a MEM.
4761 Q Append .rel for volatile store.
4762 R Print .s .d or nothing for a single, double or no truncation.
4763 S Shift amount for shladd instruction.
4764 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4765 for Intel assembler.
4766 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4767 for Intel assembler.
4768 X A pair of floating point registers.
4769 r Print register name, or constant 0 as r0. HP compatibility for
4770 Linux kernel.
4771 v Print vector constant value as an 8-byte integer value. */
4773 void
4774 ia64_print_operand (FILE * file, rtx x, int code)
4776 const char *str;
4778 switch (code)
4780 case 0:
4781 /* Handled below. */
4782 break;
4784 case 'C':
4786 enum rtx_code c = swap_condition (GET_CODE (x));
4787 fputs (GET_RTX_NAME (c), file);
4788 return;
4791 case 'D':
4792 switch (GET_CODE (x))
4794 case NE:
4795 str = "neq";
4796 break;
4797 case UNORDERED:
4798 str = "unord";
4799 break;
4800 case ORDERED:
4801 str = "ord";
4802 break;
4803 case UNLT:
4804 str = "nge";
4805 break;
4806 case UNLE:
4807 str = "ngt";
4808 break;
4809 case UNGT:
4810 str = "nle";
4811 break;
4812 case UNGE:
4813 str = "nlt";
4814 break;
4815 default:
4816 str = GET_RTX_NAME (GET_CODE (x));
4817 break;
4819 fputs (str, file);
4820 return;
4822 case 'E':
4823 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4824 return;
4826 case 'e':
4827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4828 return;
4830 case 'F':
4831 if (x == CONST0_RTX (GET_MODE (x)))
4832 str = reg_names [FR_REG (0)];
4833 else if (x == CONST1_RTX (GET_MODE (x)))
4834 str = reg_names [FR_REG (1)];
4835 else
4837 gcc_assert (GET_CODE (x) == REG);
4838 str = reg_names [REGNO (x)];
4840 fputs (str, file);
4841 return;
4843 case 'G':
4845 long val[4];
4846 REAL_VALUE_TYPE rv;
4847 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4848 real_to_target (val, &rv, GET_MODE (x));
4849 if (GET_MODE (x) == SFmode)
4850 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4851 else if (GET_MODE (x) == DFmode)
4852 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4853 & 0xffffffff,
4854 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4855 & 0xffffffff);
4856 else
4857 output_operand_lossage ("invalid %%G mode");
4859 return;
4861 case 'I':
4862 fputs (reg_names [REGNO (x) + 1], file);
4863 return;
4865 case 'J':
4866 case 'j':
4868 unsigned int regno = REGNO (XEXP (x, 0));
4869 if (GET_CODE (x) == EQ)
4870 regno += 1;
4871 if (code == 'j')
4872 regno ^= 1;
4873 fputs (reg_names [regno], file);
4875 return;
4877 case 'O':
4878 if (MEM_VOLATILE_P (x))
4879 fputs(".acq", file);
4880 return;
4882 case 'P':
4884 HOST_WIDE_INT value;
4886 switch (GET_CODE (XEXP (x, 0)))
4888 default:
4889 return;
4891 case POST_MODIFY:
4892 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4893 if (GET_CODE (x) == CONST_INT)
4894 value = INTVAL (x);
4895 else
4897 gcc_assert (GET_CODE (x) == REG);
4898 fprintf (file, ", %s", reg_names[REGNO (x)]);
4899 return;
4901 break;
4903 case POST_INC:
4904 value = GET_MODE_SIZE (GET_MODE (x));
4905 break;
4907 case POST_DEC:
4908 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4909 break;
4912 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4913 return;
4916 case 'Q':
4917 if (MEM_VOLATILE_P (x))
4918 fputs(".rel", file);
4919 return;
4921 case 'R':
4922 if (x == CONST0_RTX (GET_MODE (x)))
4923 fputs(".s", file);
4924 else if (x == CONST1_RTX (GET_MODE (x)))
4925 fputs(".d", file);
4926 else if (x == CONST2_RTX (GET_MODE (x)))
4928 else
4929 output_operand_lossage ("invalid %%R value");
4930 return;
4932 case 'S':
4933 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4934 return;
4936 case 'T':
4937 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4939 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4940 return;
4942 break;
4944 case 'U':
4945 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4947 const char *prefix = "0x";
4948 if (INTVAL (x) & 0x80000000)
4950 fprintf (file, "0xffffffff");
4951 prefix = "";
4953 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4954 return;
4956 break;
4958 case 'X':
4960 unsigned int regno = REGNO (x);
4961 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4963 return;
4965 case 'r':
4966 /* If this operand is the constant zero, write it as register zero.
4967 Any register, zero, or CONST_INT value is OK here. */
4968 if (GET_CODE (x) == REG)
4969 fputs (reg_names[REGNO (x)], file);
4970 else if (x == CONST0_RTX (GET_MODE (x)))
4971 fputs ("r0", file);
4972 else if (GET_CODE (x) == CONST_INT)
4973 output_addr_const (file, x);
4974 else
4975 output_operand_lossage ("invalid %%r value");
4976 return;
4978 case 'v':
4979 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4980 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4981 break;
4983 case '+':
4985 const char *which;
4987 /* For conditional branches, returns or calls, substitute
4988 sptk, dptk, dpnt, or spnt for %s. */
4989 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4990 if (x)
4992 int pred_val = INTVAL (XEXP (x, 0));
4994 /* Guess top and bottom 10% statically predicted. */
4995 if (pred_val < REG_BR_PROB_BASE / 50
4996 && br_prob_note_reliable_p (x))
4997 which = ".spnt";
4998 else if (pred_val < REG_BR_PROB_BASE / 2)
4999 which = ".dpnt";
5000 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5001 || !br_prob_note_reliable_p (x))
5002 which = ".dptk";
5003 else
5004 which = ".sptk";
5006 else if (GET_CODE (current_output_insn) == CALL_INSN)
5007 which = ".sptk";
5008 else
5009 which = ".dptk";
5011 fputs (which, file);
5012 return;
5015 case ',':
5016 x = current_insn_predicate;
5017 if (x)
5019 unsigned int regno = REGNO (XEXP (x, 0));
5020 if (GET_CODE (x) == EQ)
5021 regno += 1;
5022 fprintf (file, "(%s) ", reg_names [regno]);
5024 return;
5026 default:
5027 output_operand_lossage ("ia64_print_operand: unknown code");
5028 return;
5031 switch (GET_CODE (x))
5033 /* This happens for the spill/restore instructions. */
5034 case POST_INC:
5035 case POST_DEC:
5036 case POST_MODIFY:
5037 x = XEXP (x, 0);
5038 /* ... fall through ... */
5040 case REG:
5041 fputs (reg_names [REGNO (x)], file);
5042 break;
5044 case MEM:
5046 rtx addr = XEXP (x, 0);
5047 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5048 addr = XEXP (addr, 0);
5049 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5050 break;
5053 default:
5054 output_addr_const (file, x);
5055 break;
5058 return;
5061 /* Compute a (partial) cost for rtx X. Return true if the complete
5062 cost has been computed, and false if subexpressions should be
5063 scanned. In either case, *TOTAL contains the cost result. */
5064 /* ??? This is incomplete. */
5066 static bool
5067 ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5068 bool speed ATTRIBUTE_UNUSED)
5070 switch (code)
5072 case CONST_INT:
5073 switch (outer_code)
5075 case SET:
5076 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5077 return true;
5078 case PLUS:
5079 if (satisfies_constraint_I (x))
5080 *total = 0;
5081 else if (satisfies_constraint_J (x))
5082 *total = 1;
5083 else
5084 *total = COSTS_N_INSNS (1);
5085 return true;
5086 default:
5087 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5088 *total = 0;
5089 else
5090 *total = COSTS_N_INSNS (1);
5091 return true;
5094 case CONST_DOUBLE:
5095 *total = COSTS_N_INSNS (1);
5096 return true;
5098 case CONST:
5099 case SYMBOL_REF:
5100 case LABEL_REF:
5101 *total = COSTS_N_INSNS (3);
5102 return true;
5104 case MULT:
5105 /* For multiplies wider than HImode, we have to go to the FPU,
5106 which normally involves copies. Plus there's the latency
5107 of the multiply itself, and the latency of the instructions to
5108 transfer integer regs to FP regs. */
5109 /* ??? Check for FP mode. */
5110 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5111 *total = COSTS_N_INSNS (10);
5112 else
5113 *total = COSTS_N_INSNS (2);
5114 return true;
5116 case PLUS:
5117 case MINUS:
5118 case ASHIFT:
5119 case ASHIFTRT:
5120 case LSHIFTRT:
5121 *total = COSTS_N_INSNS (1);
5122 return true;
5124 case DIV:
5125 case UDIV:
5126 case MOD:
5127 case UMOD:
5128 /* We make divide expensive, so that divide-by-constant will be
5129 optimized to a multiply. */
5130 *total = COSTS_N_INSNS (60);
5131 return true;
5133 default:
5134 return false;
5138 /* Calculate the cost of moving data from a register in class FROM to
5139 one in class TO, using MODE. */
5142 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
5143 enum reg_class to)
5145 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5146 if (to == ADDL_REGS)
5147 to = GR_REGS;
5148 if (from == ADDL_REGS)
5149 from = GR_REGS;
5151 /* All costs are symmetric, so reduce cases by putting the
5152 lower number class as the destination. */
5153 if (from < to)
5155 enum reg_class tmp = to;
5156 to = from, from = tmp;
5159 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5160 so that we get secondary memory reloads. Between FR_REGS,
5161 we have to make this at least as expensive as MEMORY_MOVE_COST
5162 to avoid spectacularly poor register class preferencing. */
5163 if (mode == XFmode || mode == RFmode)
5165 if (to != GR_REGS || from != GR_REGS)
5166 return MEMORY_MOVE_COST (mode, to, 0);
5167 else
5168 return 3;
5171 switch (to)
5173 case PR_REGS:
5174 /* Moving between PR registers takes two insns. */
5175 if (from == PR_REGS)
5176 return 3;
5177 /* Moving between PR and anything but GR is impossible. */
5178 if (from != GR_REGS)
5179 return MEMORY_MOVE_COST (mode, to, 0);
5180 break;
5182 case BR_REGS:
5183 /* Moving between BR and anything but GR is impossible. */
5184 if (from != GR_REGS && from != GR_AND_BR_REGS)
5185 return MEMORY_MOVE_COST (mode, to, 0);
5186 break;
5188 case AR_I_REGS:
5189 case AR_M_REGS:
5190 /* Moving between AR and anything but GR is impossible. */
5191 if (from != GR_REGS)
5192 return MEMORY_MOVE_COST (mode, to, 0);
5193 break;
5195 case GR_REGS:
5196 case FR_REGS:
5197 case FP_REGS:
5198 case GR_AND_FR_REGS:
5199 case GR_AND_BR_REGS:
5200 case ALL_REGS:
5201 break;
5203 default:
5204 gcc_unreachable ();
5207 return 2;
5210 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
5211 to use when copying X into that class. */
5213 enum reg_class
5214 ia64_preferred_reload_class (rtx x, enum reg_class rclass)
5216 switch (rclass)
5218 case FR_REGS:
5219 case FP_REGS:
5220 /* Don't allow volatile mem reloads into floating point registers.
5221 This is defined to force reload to choose the r/m case instead
5222 of the f/f case when reloading (set (reg fX) (mem/v)). */
5223 if (MEM_P (x) && MEM_VOLATILE_P (x))
5224 return NO_REGS;
5226 /* Force all unrecognized constants into the constant pool. */
5227 if (CONSTANT_P (x))
5228 return NO_REGS;
5229 break;
5231 case AR_M_REGS:
5232 case AR_I_REGS:
5233 if (!OBJECT_P (x))
5234 return NO_REGS;
5235 break;
5237 default:
5238 break;
5241 return rclass;
5244 /* This function returns the register class required for a secondary
5245 register when copying between one of the registers in RCLASS, and X,
5246 using MODE. A return value of NO_REGS means that no secondary register
5247 is required. */
5249 enum reg_class
5250 ia64_secondary_reload_class (enum reg_class rclass,
5251 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5253 int regno = -1;
5255 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5256 regno = true_regnum (x);
5258 switch (rclass)
5260 case BR_REGS:
5261 case AR_M_REGS:
5262 case AR_I_REGS:
5263 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5264 interaction. We end up with two pseudos with overlapping lifetimes
5265 both of which are equiv to the same constant, and both which need
5266 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5267 changes depending on the path length, which means the qty_first_reg
5268 check in make_regs_eqv can give different answers at different times.
5269 At some point I'll probably need a reload_indi pattern to handle
5270 this.
5272 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5273 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5274 non-general registers for good measure. */
5275 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5276 return GR_REGS;
5278 /* This is needed if a pseudo used as a call_operand gets spilled to a
5279 stack slot. */
5280 if (GET_CODE (x) == MEM)
5281 return GR_REGS;
5282 break;
5284 case FR_REGS:
5285 case FP_REGS:
5286 /* Need to go through general registers to get to other class regs. */
5287 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5288 return GR_REGS;
5290 /* This can happen when a paradoxical subreg is an operand to the
5291 muldi3 pattern. */
5292 /* ??? This shouldn't be necessary after instruction scheduling is
5293 enabled, because paradoxical subregs are not accepted by
5294 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5295 stop the paradoxical subreg stupidity in the *_operand functions
5296 in recog.c. */
5297 if (GET_CODE (x) == MEM
5298 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5299 || GET_MODE (x) == QImode))
5300 return GR_REGS;
5302 /* This can happen because of the ior/and/etc patterns that accept FP
5303 registers as operands. If the third operand is a constant, then it
5304 needs to be reloaded into a FP register. */
5305 if (GET_CODE (x) == CONST_INT)
5306 return GR_REGS;
5308 /* This can happen because of register elimination in a muldi3 insn.
5309 E.g. `26107 * (unsigned long)&u'. */
5310 if (GET_CODE (x) == PLUS)
5311 return GR_REGS;
5312 break;
5314 case PR_REGS:
5315 /* ??? This happens if we cse/gcse a BImode value across a call,
5316 and the function has a nonlocal goto. This is because global
5317 does not allocate call crossing pseudos to hard registers when
5318 crtl->has_nonlocal_goto is true. This is relatively
5319 common for C++ programs that use exceptions. To reproduce,
5320 return NO_REGS and compile libstdc++. */
5321 if (GET_CODE (x) == MEM)
5322 return GR_REGS;
5324 /* This can happen when we take a BImode subreg of a DImode value,
5325 and that DImode value winds up in some non-GR register. */
5326 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5327 return GR_REGS;
5328 break;
5330 default:
5331 break;
5334 return NO_REGS;
5338 /* Implement targetm.unspec_may_trap_p hook. */
5339 static int
5340 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5342 if (GET_CODE (x) == UNSPEC)
5344 switch (XINT (x, 1))
5346 case UNSPEC_LDA:
5347 case UNSPEC_LDS:
5348 case UNSPEC_LDSA:
5349 case UNSPEC_LDCCLR:
5350 case UNSPEC_CHKACLR:
5351 case UNSPEC_CHKS:
5352 /* These unspecs are just wrappers. */
5353 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5357 return default_unspec_may_trap_p (x, flags);
5361 /* Parse the -mfixed-range= option string. */
5363 static void
5364 fix_range (const char *const_str)
5366 int i, first, last;
5367 char *str, *dash, *comma;
5369 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5370 REG2 are either register names or register numbers. The effect
5371 of this option is to mark the registers in the range from REG1 to
5372 REG2 as ``fixed'' so they won't be used by the compiler. This is
5373 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5375 i = strlen (const_str);
5376 str = (char *) alloca (i + 1);
5377 memcpy (str, const_str, i + 1);
5379 while (1)
5381 dash = strchr (str, '-');
5382 if (!dash)
5384 warning (0, "value of -mfixed-range must have form REG1-REG2");
5385 return;
5387 *dash = '\0';
5389 comma = strchr (dash + 1, ',');
5390 if (comma)
5391 *comma = '\0';
5393 first = decode_reg_name (str);
5394 if (first < 0)
5396 warning (0, "unknown register name: %s", str);
5397 return;
5400 last = decode_reg_name (dash + 1);
5401 if (last < 0)
5403 warning (0, "unknown register name: %s", dash + 1);
5404 return;
5407 *dash = '-';
5409 if (first > last)
5411 warning (0, "%s-%s is an empty range", str, dash + 1);
5412 return;
5415 for (i = first; i <= last; ++i)
5416 fixed_regs[i] = call_used_regs[i] = 1;
5418 if (!comma)
5419 break;
5421 *comma = ',';
5422 str = comma + 1;
5426 /* Implement TARGET_HANDLE_OPTION. */
5428 static bool
5429 ia64_handle_option (size_t code, const char *arg, int value)
5431 switch (code)
5433 case OPT_mfixed_range_:
5434 fix_range (arg);
5435 return true;
5437 case OPT_mtls_size_:
5438 if (value != 14 && value != 22 && value != 64)
5439 error ("bad value %<%s%> for -mtls-size= switch", arg);
5440 return true;
5442 case OPT_mtune_:
5444 static struct pta
5446 const char *name; /* processor name or nickname. */
5447 enum processor_type processor;
5449 const processor_alias_table[] =
5451 {"itanium2", PROCESSOR_ITANIUM2},
5452 {"mckinley", PROCESSOR_ITANIUM2},
5454 int const pta_size = ARRAY_SIZE (processor_alias_table);
5455 int i;
5457 for (i = 0; i < pta_size; i++)
5458 if (!strcmp (arg, processor_alias_table[i].name))
5460 ia64_tune = processor_alias_table[i].processor;
5461 break;
5463 if (i == pta_size)
5464 error ("bad value %<%s%> for -mtune= switch", arg);
5465 return true;
5468 default:
5469 return true;
5473 /* Implement OVERRIDE_OPTIONS. */
5475 void
5476 ia64_override_options (void)
5478 if (TARGET_AUTO_PIC)
5479 target_flags |= MASK_CONST_GP;
5481 /* Numerous experiment shows that IRA based loop pressure
5482 calculation works better for RTL loop invariant motion on targets
5483 with enough (>= 32) registers. It is an expensive optimization.
5484 So it is on only for peak performance. */
5485 if (optimize >= 3)
5486 flag_ira_loop_pressure = 1;
5489 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5491 init_machine_status = ia64_init_machine_status;
5493 if (align_functions <= 0)
5494 align_functions = 64;
5495 if (align_loops <= 0)
5496 align_loops = 32;
5497 if (TARGET_ABI_OPEN_VMS)
5498 flag_no_common = 1;
5500 ia64_override_options_after_change();
5503 /* Implement targetm.override_options_after_change. */
5505 static void
5506 ia64_override_options_after_change (void)
5508 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5509 flag_schedule_insns_after_reload = 0;
5511 if (optimize >= 3
5512 && ! sel_sched_switch_set)
5514 flag_selective_scheduling2 = 1;
5515 flag_sel_sched_pipelining = 1;
5517 if (mflag_sched_control_spec == 2)
5519 /* Control speculation is on by default for the selective scheduler,
5520 but not for the Haifa scheduler. */
5521 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5523 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5525 /* FIXME: remove this when we'd implement breaking autoinsns as
5526 a transformation. */
5527 flag_auto_inc_dec = 0;
5531 /* Initialize the record of emitted frame related registers. */
5533 void ia64_init_expanders (void)
5535 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5538 static struct machine_function *
5539 ia64_init_machine_status (void)
5541 return GGC_CNEW (struct machine_function);
5544 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5545 static enum attr_type ia64_safe_type (rtx);
5547 static enum attr_itanium_class
5548 ia64_safe_itanium_class (rtx insn)
5550 if (recog_memoized (insn) >= 0)
5551 return get_attr_itanium_class (insn);
5552 else if (DEBUG_INSN_P (insn))
5553 return ITANIUM_CLASS_IGNORE;
5554 else
5555 return ITANIUM_CLASS_UNKNOWN;
5558 static enum attr_type
5559 ia64_safe_type (rtx insn)
5561 if (recog_memoized (insn) >= 0)
5562 return get_attr_type (insn);
5563 else
5564 return TYPE_UNKNOWN;
5567 /* The following collection of routines emit instruction group stop bits as
5568 necessary to avoid dependencies. */
5570 /* Need to track some additional registers as far as serialization is
5571 concerned so we can properly handle br.call and br.ret. We could
5572 make these registers visible to gcc, but since these registers are
5573 never explicitly used in gcc generated code, it seems wasteful to
5574 do so (plus it would make the call and return patterns needlessly
5575 complex). */
5576 #define REG_RP (BR_REG (0))
5577 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5578 /* This is used for volatile asms which may require a stop bit immediately
5579 before and after them. */
5580 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5581 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5582 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5584 /* For each register, we keep track of how it has been written in the
5585 current instruction group.
5587 If a register is written unconditionally (no qualifying predicate),
5588 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5590 If a register is written if its qualifying predicate P is true, we
5591 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5592 may be written again by the complement of P (P^1) and when this happens,
5593 WRITE_COUNT gets set to 2.
5595 The result of this is that whenever an insn attempts to write a register
5596 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5598 If a predicate register is written by a floating-point insn, we set
5599 WRITTEN_BY_FP to true.
5601 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5602 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5604 #if GCC_VERSION >= 4000
5605 #define RWS_FIELD_TYPE __extension__ unsigned short
5606 #else
5607 #define RWS_FIELD_TYPE unsigned int
5608 #endif
5609 struct reg_write_state
5611 RWS_FIELD_TYPE write_count : 2;
5612 RWS_FIELD_TYPE first_pred : 10;
5613 RWS_FIELD_TYPE written_by_fp : 1;
5614 RWS_FIELD_TYPE written_by_and : 1;
5615 RWS_FIELD_TYPE written_by_or : 1;
5618 /* Cumulative info for the current instruction group. */
5619 struct reg_write_state rws_sum[NUM_REGS];
5620 #ifdef ENABLE_CHECKING
5621 /* Bitmap whether a register has been written in the current insn. */
5622 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5623 / HOST_BITS_PER_WIDEST_FAST_INT];
5625 static inline void
5626 rws_insn_set (int regno)
5628 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5629 SET_HARD_REG_BIT (rws_insn, regno);
5632 static inline int
5633 rws_insn_test (int regno)
5635 return TEST_HARD_REG_BIT (rws_insn, regno);
5637 #else
5638 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5639 unsigned char rws_insn[2];
5641 static inline void
5642 rws_insn_set (int regno)
5644 if (regno == REG_AR_CFM)
5645 rws_insn[0] = 1;
5646 else if (regno == REG_VOLATILE)
5647 rws_insn[1] = 1;
5650 static inline int
5651 rws_insn_test (int regno)
5653 if (regno == REG_AR_CFM)
5654 return rws_insn[0];
5655 if (regno == REG_VOLATILE)
5656 return rws_insn[1];
5657 return 0;
5659 #endif
5661 /* Indicates whether this is the first instruction after a stop bit,
5662 in which case we don't need another stop bit. Without this,
5663 ia64_variable_issue will die when scheduling an alloc. */
5664 static int first_instruction;
5666 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5667 RTL for one instruction. */
5668 struct reg_flags
5670 unsigned int is_write : 1; /* Is register being written? */
5671 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5672 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5673 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5674 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5675 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5678 static void rws_update (int, struct reg_flags, int);
5679 static int rws_access_regno (int, struct reg_flags, int);
5680 static int rws_access_reg (rtx, struct reg_flags, int);
5681 static void update_set_flags (rtx, struct reg_flags *);
5682 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5683 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5684 static void init_insn_group_barriers (void);
5685 static int group_barrier_needed (rtx);
5686 static int safe_group_barrier_needed (rtx);
5687 static int in_safe_group_barrier;
5689 /* Update *RWS for REGNO, which is being written by the current instruction,
5690 with predicate PRED, and associated register flags in FLAGS. */
5692 static void
5693 rws_update (int regno, struct reg_flags flags, int pred)
5695 if (pred)
5696 rws_sum[regno].write_count++;
5697 else
5698 rws_sum[regno].write_count = 2;
5699 rws_sum[regno].written_by_fp |= flags.is_fp;
5700 /* ??? Not tracking and/or across differing predicates. */
5701 rws_sum[regno].written_by_and = flags.is_and;
5702 rws_sum[regno].written_by_or = flags.is_or;
5703 rws_sum[regno].first_pred = pred;
5706 /* Handle an access to register REGNO of type FLAGS using predicate register
5707 PRED. Update rws_sum array. Return 1 if this access creates
5708 a dependency with an earlier instruction in the same group. */
5710 static int
5711 rws_access_regno (int regno, struct reg_flags flags, int pred)
5713 int need_barrier = 0;
5715 gcc_assert (regno < NUM_REGS);
5717 if (! PR_REGNO_P (regno))
5718 flags.is_and = flags.is_or = 0;
5720 if (flags.is_write)
5722 int write_count;
5724 rws_insn_set (regno);
5725 write_count = rws_sum[regno].write_count;
5727 switch (write_count)
5729 case 0:
5730 /* The register has not been written yet. */
5731 if (!in_safe_group_barrier)
5732 rws_update (regno, flags, pred);
5733 break;
5735 case 1:
5736 /* The register has been written via a predicate. If this is
5737 not a complementary predicate, then we need a barrier. */
5738 /* ??? This assumes that P and P+1 are always complementary
5739 predicates for P even. */
5740 if (flags.is_and && rws_sum[regno].written_by_and)
5742 else if (flags.is_or && rws_sum[regno].written_by_or)
5744 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5745 need_barrier = 1;
5746 if (!in_safe_group_barrier)
5747 rws_update (regno, flags, pred);
5748 break;
5750 case 2:
5751 /* The register has been unconditionally written already. We
5752 need a barrier. */
5753 if (flags.is_and && rws_sum[regno].written_by_and)
5755 else if (flags.is_or && rws_sum[regno].written_by_or)
5757 else
5758 need_barrier = 1;
5759 if (!in_safe_group_barrier)
5761 rws_sum[regno].written_by_and = flags.is_and;
5762 rws_sum[regno].written_by_or = flags.is_or;
5764 break;
5766 default:
5767 gcc_unreachable ();
5770 else
5772 if (flags.is_branch)
5774 /* Branches have several RAW exceptions that allow to avoid
5775 barriers. */
5777 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5778 /* RAW dependencies on branch regs are permissible as long
5779 as the writer is a non-branch instruction. Since we
5780 never generate code that uses a branch register written
5781 by a branch instruction, handling this case is
5782 easy. */
5783 return 0;
5785 if (REGNO_REG_CLASS (regno) == PR_REGS
5786 && ! rws_sum[regno].written_by_fp)
5787 /* The predicates of a branch are available within the
5788 same insn group as long as the predicate was written by
5789 something other than a floating-point instruction. */
5790 return 0;
5793 if (flags.is_and && rws_sum[regno].written_by_and)
5794 return 0;
5795 if (flags.is_or && rws_sum[regno].written_by_or)
5796 return 0;
5798 switch (rws_sum[regno].write_count)
5800 case 0:
5801 /* The register has not been written yet. */
5802 break;
5804 case 1:
5805 /* The register has been written via a predicate. If this is
5806 not a complementary predicate, then we need a barrier. */
5807 /* ??? This assumes that P and P+1 are always complementary
5808 predicates for P even. */
5809 if ((rws_sum[regno].first_pred ^ 1) != pred)
5810 need_barrier = 1;
5811 break;
5813 case 2:
5814 /* The register has been unconditionally written already. We
5815 need a barrier. */
5816 need_barrier = 1;
5817 break;
5819 default:
5820 gcc_unreachable ();
5824 return need_barrier;
5827 static int
5828 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5830 int regno = REGNO (reg);
5831 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5833 if (n == 1)
5834 return rws_access_regno (regno, flags, pred);
5835 else
5837 int need_barrier = 0;
5838 while (--n >= 0)
5839 need_barrier |= rws_access_regno (regno + n, flags, pred);
5840 return need_barrier;
5844 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5845 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5847 static void
5848 update_set_flags (rtx x, struct reg_flags *pflags)
5850 rtx src = SET_SRC (x);
5852 switch (GET_CODE (src))
5854 case CALL:
5855 return;
5857 case IF_THEN_ELSE:
5858 /* There are four cases here:
5859 (1) The destination is (pc), in which case this is a branch,
5860 nothing here applies.
5861 (2) The destination is ar.lc, in which case this is a
5862 doloop_end_internal,
5863 (3) The destination is an fp register, in which case this is
5864 an fselect instruction.
5865 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5866 this is a check load.
5867 In all cases, nothing we do in this function applies. */
5868 return;
5870 default:
5871 if (COMPARISON_P (src)
5872 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5873 /* Set pflags->is_fp to 1 so that we know we're dealing
5874 with a floating point comparison when processing the
5875 destination of the SET. */
5876 pflags->is_fp = 1;
5878 /* Discover if this is a parallel comparison. We only handle
5879 and.orcm and or.andcm at present, since we must retain a
5880 strict inverse on the predicate pair. */
5881 else if (GET_CODE (src) == AND)
5882 pflags->is_and = 1;
5883 else if (GET_CODE (src) == IOR)
5884 pflags->is_or = 1;
5886 break;
5890 /* Subroutine of rtx_needs_barrier; this function determines whether the
5891 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5892 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5893 for this insn. */
5895 static int
5896 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5898 int need_barrier = 0;
5899 rtx dst;
5900 rtx src = SET_SRC (x);
5902 if (GET_CODE (src) == CALL)
5903 /* We don't need to worry about the result registers that
5904 get written by subroutine call. */
5905 return rtx_needs_barrier (src, flags, pred);
5906 else if (SET_DEST (x) == pc_rtx)
5908 /* X is a conditional branch. */
5909 /* ??? This seems redundant, as the caller sets this bit for
5910 all JUMP_INSNs. */
5911 if (!ia64_spec_check_src_p (src))
5912 flags.is_branch = 1;
5913 return rtx_needs_barrier (src, flags, pred);
5916 if (ia64_spec_check_src_p (src))
5917 /* Avoid checking one register twice (in condition
5918 and in 'then' section) for ldc pattern. */
5920 gcc_assert (REG_P (XEXP (src, 2)));
5921 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5923 /* We process MEM below. */
5924 src = XEXP (src, 1);
5927 need_barrier |= rtx_needs_barrier (src, flags, pred);
5929 dst = SET_DEST (x);
5930 if (GET_CODE (dst) == ZERO_EXTRACT)
5932 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5933 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5935 return need_barrier;
5938 /* Handle an access to rtx X of type FLAGS using predicate register
5939 PRED. Return 1 if this access creates a dependency with an earlier
5940 instruction in the same group. */
5942 static int
5943 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5945 int i, j;
5946 int is_complemented = 0;
5947 int need_barrier = 0;
5948 const char *format_ptr;
5949 struct reg_flags new_flags;
5950 rtx cond;
5952 if (! x)
5953 return 0;
5955 new_flags = flags;
5957 switch (GET_CODE (x))
5959 case SET:
5960 update_set_flags (x, &new_flags);
5961 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5962 if (GET_CODE (SET_SRC (x)) != CALL)
5964 new_flags.is_write = 1;
5965 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5967 break;
5969 case CALL:
5970 new_flags.is_write = 0;
5971 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5973 /* Avoid multiple register writes, in case this is a pattern with
5974 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5975 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
5977 new_flags.is_write = 1;
5978 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5979 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5980 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5982 break;
5984 case COND_EXEC:
5985 /* X is a predicated instruction. */
5987 cond = COND_EXEC_TEST (x);
5988 gcc_assert (!pred);
5989 need_barrier = rtx_needs_barrier (cond, flags, 0);
5991 if (GET_CODE (cond) == EQ)
5992 is_complemented = 1;
5993 cond = XEXP (cond, 0);
5994 gcc_assert (GET_CODE (cond) == REG
5995 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5996 pred = REGNO (cond);
5997 if (is_complemented)
5998 ++pred;
6000 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6001 return need_barrier;
6003 case CLOBBER:
6004 case USE:
6005 /* Clobber & use are for earlier compiler-phases only. */
6006 break;
6008 case ASM_OPERANDS:
6009 case ASM_INPUT:
6010 /* We always emit stop bits for traditional asms. We emit stop bits
6011 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6012 if (GET_CODE (x) != ASM_OPERANDS
6013 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6015 /* Avoid writing the register multiple times if we have multiple
6016 asm outputs. This avoids a failure in rws_access_reg. */
6017 if (! rws_insn_test (REG_VOLATILE))
6019 new_flags.is_write = 1;
6020 rws_access_regno (REG_VOLATILE, new_flags, pred);
6022 return 1;
6025 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6026 We cannot just fall through here since then we would be confused
6027 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6028 traditional asms unlike their normal usage. */
6030 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6031 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6032 need_barrier = 1;
6033 break;
6035 case PARALLEL:
6036 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6038 rtx pat = XVECEXP (x, 0, i);
6039 switch (GET_CODE (pat))
6041 case SET:
6042 update_set_flags (pat, &new_flags);
6043 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6044 break;
6046 case USE:
6047 case CALL:
6048 case ASM_OPERANDS:
6049 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6050 break;
6052 case CLOBBER:
6053 case RETURN:
6054 break;
6056 default:
6057 gcc_unreachable ();
6060 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6062 rtx pat = XVECEXP (x, 0, i);
6063 if (GET_CODE (pat) == SET)
6065 if (GET_CODE (SET_SRC (pat)) != CALL)
6067 new_flags.is_write = 1;
6068 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6069 pred);
6072 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6073 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6075 break;
6077 case SUBREG:
6078 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6079 break;
6080 case REG:
6081 if (REGNO (x) == AR_UNAT_REGNUM)
6083 for (i = 0; i < 64; ++i)
6084 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6086 else
6087 need_barrier = rws_access_reg (x, flags, pred);
6088 break;
6090 case MEM:
6091 /* Find the regs used in memory address computation. */
6092 new_flags.is_write = 0;
6093 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6094 break;
6096 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6097 case SYMBOL_REF: case LABEL_REF: case CONST:
6098 break;
6100 /* Operators with side-effects. */
6101 case POST_INC: case POST_DEC:
6102 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6104 new_flags.is_write = 0;
6105 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6106 new_flags.is_write = 1;
6107 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6108 break;
6110 case POST_MODIFY:
6111 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6113 new_flags.is_write = 0;
6114 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6115 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6116 new_flags.is_write = 1;
6117 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6118 break;
6120 /* Handle common unary and binary ops for efficiency. */
6121 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6122 case MOD: case UDIV: case UMOD: case AND: case IOR:
6123 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6124 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6125 case NE: case EQ: case GE: case GT: case LE:
6126 case LT: case GEU: case GTU: case LEU: case LTU:
6127 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6128 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6129 break;
6131 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6132 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6133 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6134 case SQRT: case FFS: case POPCOUNT:
6135 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6136 break;
6138 case VEC_SELECT:
6139 /* VEC_SELECT's second argument is a PARALLEL with integers that
6140 describe the elements selected. On ia64, those integers are
6141 always constants. Avoid walking the PARALLEL so that we don't
6142 get confused with "normal" parallels and then die. */
6143 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6144 break;
6146 case UNSPEC:
6147 switch (XINT (x, 1))
6149 case UNSPEC_LTOFF_DTPMOD:
6150 case UNSPEC_LTOFF_DTPREL:
6151 case UNSPEC_DTPREL:
6152 case UNSPEC_LTOFF_TPREL:
6153 case UNSPEC_TPREL:
6154 case UNSPEC_PRED_REL_MUTEX:
6155 case UNSPEC_PIC_CALL:
6156 case UNSPEC_MF:
6157 case UNSPEC_FETCHADD_ACQ:
6158 case UNSPEC_BSP_VALUE:
6159 case UNSPEC_FLUSHRS:
6160 case UNSPEC_BUNDLE_SELECTOR:
6161 break;
6163 case UNSPEC_GR_SPILL:
6164 case UNSPEC_GR_RESTORE:
6166 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6167 HOST_WIDE_INT bit = (offset >> 3) & 63;
6169 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6170 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6171 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6172 new_flags, pred);
6173 break;
6176 case UNSPEC_FR_SPILL:
6177 case UNSPEC_FR_RESTORE:
6178 case UNSPEC_GETF_EXP:
6179 case UNSPEC_SETF_EXP:
6180 case UNSPEC_ADDP4:
6181 case UNSPEC_FR_SQRT_RECIP_APPROX:
6182 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6183 case UNSPEC_LDA:
6184 case UNSPEC_LDS:
6185 case UNSPEC_LDS_A:
6186 case UNSPEC_LDSA:
6187 case UNSPEC_CHKACLR:
6188 case UNSPEC_CHKS:
6189 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6190 break;
6192 case UNSPEC_FR_RECIP_APPROX:
6193 case UNSPEC_SHRP:
6194 case UNSPEC_COPYSIGN:
6195 case UNSPEC_FR_RECIP_APPROX_RES:
6196 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6197 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6198 break;
6200 case UNSPEC_CMPXCHG_ACQ:
6201 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6202 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6203 break;
6205 default:
6206 gcc_unreachable ();
6208 break;
6210 case UNSPEC_VOLATILE:
6211 switch (XINT (x, 1))
6213 case UNSPECV_ALLOC:
6214 /* Alloc must always be the first instruction of a group.
6215 We force this by always returning true. */
6216 /* ??? We might get better scheduling if we explicitly check for
6217 input/local/output register dependencies, and modify the
6218 scheduler so that alloc is always reordered to the start of
6219 the current group. We could then eliminate all of the
6220 first_instruction code. */
6221 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6223 new_flags.is_write = 1;
6224 rws_access_regno (REG_AR_CFM, new_flags, pred);
6225 return 1;
6227 case UNSPECV_SET_BSP:
6228 need_barrier = 1;
6229 break;
6231 case UNSPECV_BLOCKAGE:
6232 case UNSPECV_INSN_GROUP_BARRIER:
6233 case UNSPECV_BREAK:
6234 case UNSPECV_PSAC_ALL:
6235 case UNSPECV_PSAC_NORMAL:
6236 return 0;
6238 default:
6239 gcc_unreachable ();
6241 break;
6243 case RETURN:
6244 new_flags.is_write = 0;
6245 need_barrier = rws_access_regno (REG_RP, flags, pred);
6246 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6248 new_flags.is_write = 1;
6249 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6250 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6251 break;
6253 default:
6254 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6255 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6256 switch (format_ptr[i])
6258 case '0': /* unused field */
6259 case 'i': /* integer */
6260 case 'n': /* note */
6261 case 'w': /* wide integer */
6262 case 's': /* pointer to string */
6263 case 'S': /* optional pointer to string */
6264 break;
6266 case 'e':
6267 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6268 need_barrier = 1;
6269 break;
6271 case 'E':
6272 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6273 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6274 need_barrier = 1;
6275 break;
6277 default:
6278 gcc_unreachable ();
6280 break;
6282 return need_barrier;
6285 /* Clear out the state for group_barrier_needed at the start of a
6286 sequence of insns. */
6288 static void
6289 init_insn_group_barriers (void)
6291 memset (rws_sum, 0, sizeof (rws_sum));
6292 first_instruction = 1;
6295 /* Given the current state, determine whether a group barrier (a stop bit) is
6296 necessary before INSN. Return nonzero if so. This modifies the state to
6297 include the effects of INSN as a side-effect. */
6299 static int
6300 group_barrier_needed (rtx insn)
6302 rtx pat;
6303 int need_barrier = 0;
6304 struct reg_flags flags;
6306 memset (&flags, 0, sizeof (flags));
6307 switch (GET_CODE (insn))
6309 case NOTE:
6310 case DEBUG_INSN:
6311 break;
6313 case BARRIER:
6314 /* A barrier doesn't imply an instruction group boundary. */
6315 break;
6317 case CODE_LABEL:
6318 memset (rws_insn, 0, sizeof (rws_insn));
6319 return 1;
6321 case CALL_INSN:
6322 flags.is_branch = 1;
6323 flags.is_sibcall = SIBLING_CALL_P (insn);
6324 memset (rws_insn, 0, sizeof (rws_insn));
6326 /* Don't bundle a call following another call. */
6327 if ((pat = prev_active_insn (insn))
6328 && GET_CODE (pat) == CALL_INSN)
6330 need_barrier = 1;
6331 break;
6334 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6335 break;
6337 case JUMP_INSN:
6338 if (!ia64_spec_check_p (insn))
6339 flags.is_branch = 1;
6341 /* Don't bundle a jump following a call. */
6342 if ((pat = prev_active_insn (insn))
6343 && GET_CODE (pat) == CALL_INSN)
6345 need_barrier = 1;
6346 break;
6348 /* FALLTHRU */
6350 case INSN:
6351 if (GET_CODE (PATTERN (insn)) == USE
6352 || GET_CODE (PATTERN (insn)) == CLOBBER)
6353 /* Don't care about USE and CLOBBER "insns"---those are used to
6354 indicate to the optimizer that it shouldn't get rid of
6355 certain operations. */
6356 break;
6358 pat = PATTERN (insn);
6360 /* Ug. Hack hacks hacked elsewhere. */
6361 switch (recog_memoized (insn))
6363 /* We play dependency tricks with the epilogue in order
6364 to get proper schedules. Undo this for dv analysis. */
6365 case CODE_FOR_epilogue_deallocate_stack:
6366 case CODE_FOR_prologue_allocate_stack:
6367 pat = XVECEXP (pat, 0, 0);
6368 break;
6370 /* The pattern we use for br.cloop confuses the code above.
6371 The second element of the vector is representative. */
6372 case CODE_FOR_doloop_end_internal:
6373 pat = XVECEXP (pat, 0, 1);
6374 break;
6376 /* Doesn't generate code. */
6377 case CODE_FOR_pred_rel_mutex:
6378 case CODE_FOR_prologue_use:
6379 return 0;
6381 default:
6382 break;
6385 memset (rws_insn, 0, sizeof (rws_insn));
6386 need_barrier = rtx_needs_barrier (pat, flags, 0);
6388 /* Check to see if the previous instruction was a volatile
6389 asm. */
6390 if (! need_barrier)
6391 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6393 break;
6395 default:
6396 gcc_unreachable ();
6399 if (first_instruction && INSN_P (insn)
6400 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6401 && GET_CODE (PATTERN (insn)) != USE
6402 && GET_CODE (PATTERN (insn)) != CLOBBER)
6404 need_barrier = 0;
6405 first_instruction = 0;
6408 return need_barrier;
6411 /* Like group_barrier_needed, but do not clobber the current state. */
6413 static int
6414 safe_group_barrier_needed (rtx insn)
6416 int saved_first_instruction;
6417 int t;
6419 saved_first_instruction = first_instruction;
6420 in_safe_group_barrier = 1;
6422 t = group_barrier_needed (insn);
6424 first_instruction = saved_first_instruction;
6425 in_safe_group_barrier = 0;
6427 return t;
6430 /* Scan the current function and insert stop bits as necessary to
6431 eliminate dependencies. This function assumes that a final
6432 instruction scheduling pass has been run which has already
6433 inserted most of the necessary stop bits. This function only
6434 inserts new ones at basic block boundaries, since these are
6435 invisible to the scheduler. */
6437 static void
6438 emit_insn_group_barriers (FILE *dump)
6440 rtx insn;
6441 rtx last_label = 0;
6442 int insns_since_last_label = 0;
6444 init_insn_group_barriers ();
6446 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6448 if (GET_CODE (insn) == CODE_LABEL)
6450 if (insns_since_last_label)
6451 last_label = insn;
6452 insns_since_last_label = 0;
6454 else if (GET_CODE (insn) == NOTE
6455 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6457 if (insns_since_last_label)
6458 last_label = insn;
6459 insns_since_last_label = 0;
6461 else if (GET_CODE (insn) == INSN
6462 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6463 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6465 init_insn_group_barriers ();
6466 last_label = 0;
6468 else if (NONDEBUG_INSN_P (insn))
6470 insns_since_last_label = 1;
6472 if (group_barrier_needed (insn))
6474 if (last_label)
6476 if (dump)
6477 fprintf (dump, "Emitting stop before label %d\n",
6478 INSN_UID (last_label));
6479 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6480 insn = last_label;
6482 init_insn_group_barriers ();
6483 last_label = 0;
6490 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6491 This function has to emit all necessary group barriers. */
6493 static void
6494 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6496 rtx insn;
6498 init_insn_group_barriers ();
6500 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6502 if (GET_CODE (insn) == BARRIER)
6504 rtx last = prev_active_insn (insn);
6506 if (! last)
6507 continue;
6508 if (GET_CODE (last) == JUMP_INSN
6509 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6510 last = prev_active_insn (last);
6511 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6512 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6514 init_insn_group_barriers ();
6516 else if (NONDEBUG_INSN_P (insn))
6518 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6519 init_insn_group_barriers ();
6520 else if (group_barrier_needed (insn))
6522 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6523 init_insn_group_barriers ();
6524 group_barrier_needed (insn);
6532 /* Instruction scheduling support. */
6534 #define NR_BUNDLES 10
6536 /* A list of names of all available bundles. */
6538 static const char *bundle_name [NR_BUNDLES] =
6540 ".mii",
6541 ".mmi",
6542 ".mfi",
6543 ".mmf",
6544 #if NR_BUNDLES == 10
6545 ".bbb",
6546 ".mbb",
6547 #endif
6548 ".mib",
6549 ".mmb",
6550 ".mfb",
6551 ".mlx"
6554 /* Nonzero if we should insert stop bits into the schedule. */
6556 int ia64_final_schedule = 0;
6558 /* Codes of the corresponding queried units: */
6560 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6561 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6563 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6564 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6566 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6568 /* The following variable value is an insn group barrier. */
6570 static rtx dfa_stop_insn;
6572 /* The following variable value is the last issued insn. */
6574 static rtx last_scheduled_insn;
6576 /* The following variable value is pointer to a DFA state used as
6577 temporary variable. */
6579 static state_t temp_dfa_state = NULL;
6581 /* The following variable value is DFA state after issuing the last
6582 insn. */
6584 static state_t prev_cycle_state = NULL;
6586 /* The following array element values are TRUE if the corresponding
6587 insn requires to add stop bits before it. */
6589 static char *stops_p = NULL;
6591 /* The following variable is used to set up the mentioned above array. */
6593 static int stop_before_p = 0;
6595 /* The following variable value is length of the arrays `clocks' and
6596 `add_cycles'. */
6598 static int clocks_length;
6600 /* The following variable value is number of data speculations in progress. */
6601 static int pending_data_specs = 0;
6603 /* Number of memory references on current and three future processor cycles. */
6604 static char mem_ops_in_group[4];
6606 /* Number of current processor cycle (from scheduler's point of view). */
6607 static int current_cycle;
6609 static rtx ia64_single_set (rtx);
6610 static void ia64_emit_insn_before (rtx, rtx);
6612 /* Map a bundle number to its pseudo-op. */
6614 const char *
6615 get_bundle_name (int b)
6617 return bundle_name[b];
6621 /* Return the maximum number of instructions a cpu can issue. */
6623 static int
6624 ia64_issue_rate (void)
6626 return 6;
6629 /* Helper function - like single_set, but look inside COND_EXEC. */
6631 static rtx
6632 ia64_single_set (rtx insn)
6634 rtx x = PATTERN (insn), ret;
6635 if (GET_CODE (x) == COND_EXEC)
6636 x = COND_EXEC_CODE (x);
6637 if (GET_CODE (x) == SET)
6638 return x;
6640 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6641 Although they are not classical single set, the second set is there just
6642 to protect it from moving past FP-relative stack accesses. */
6643 switch (recog_memoized (insn))
6645 case CODE_FOR_prologue_allocate_stack:
6646 case CODE_FOR_epilogue_deallocate_stack:
6647 ret = XVECEXP (x, 0, 0);
6648 break;
6650 default:
6651 ret = single_set_2 (insn, x);
6652 break;
6655 return ret;
6658 /* Adjust the cost of a scheduling dependency.
6659 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6660 COST is the current cost, DW is dependency weakness. */
6661 static int
6662 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6664 enum reg_note dep_type = (enum reg_note) dep_type1;
6665 enum attr_itanium_class dep_class;
6666 enum attr_itanium_class insn_class;
6668 insn_class = ia64_safe_itanium_class (insn);
6669 dep_class = ia64_safe_itanium_class (dep_insn);
6671 /* Treat true memory dependencies separately. Ignore apparent true
6672 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6673 if (dep_type == REG_DEP_TRUE
6674 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6675 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6676 return 0;
6678 if (dw == MIN_DEP_WEAK)
6679 /* Store and load are likely to alias, use higher cost to avoid stall. */
6680 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6681 else if (dw > MIN_DEP_WEAK)
6683 /* Store and load are less likely to alias. */
6684 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6685 /* Assume there will be no cache conflict for floating-point data.
6686 For integer data, L1 conflict penalty is huge (17 cycles), so we
6687 never assume it will not cause a conflict. */
6688 return 0;
6689 else
6690 return cost;
6693 if (dep_type != REG_DEP_OUTPUT)
6694 return cost;
6696 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6697 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6698 return 0;
6700 return cost;
6703 /* Like emit_insn_before, but skip cycle_display notes.
6704 ??? When cycle display notes are implemented, update this. */
6706 static void
6707 ia64_emit_insn_before (rtx insn, rtx before)
6709 emit_insn_before (insn, before);
6712 /* The following function marks insns who produce addresses for load
6713 and store insns. Such insns will be placed into M slots because it
6714 decrease latency time for Itanium1 (see function
6715 `ia64_produce_address_p' and the DFA descriptions). */
6717 static void
6718 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6720 rtx insn, next, next_tail;
6722 /* Before reload, which_alternative is not set, which means that
6723 ia64_safe_itanium_class will produce wrong results for (at least)
6724 move instructions. */
6725 if (!reload_completed)
6726 return;
6728 next_tail = NEXT_INSN (tail);
6729 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6730 if (INSN_P (insn))
6731 insn->call = 0;
6732 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6733 if (INSN_P (insn)
6734 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6736 sd_iterator_def sd_it;
6737 dep_t dep;
6738 bool has_mem_op_consumer_p = false;
6740 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6742 enum attr_itanium_class c;
6744 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6745 continue;
6747 next = DEP_CON (dep);
6748 c = ia64_safe_itanium_class (next);
6749 if ((c == ITANIUM_CLASS_ST
6750 || c == ITANIUM_CLASS_STF)
6751 && ia64_st_address_bypass_p (insn, next))
6753 has_mem_op_consumer_p = true;
6754 break;
6756 else if ((c == ITANIUM_CLASS_LD
6757 || c == ITANIUM_CLASS_FLD
6758 || c == ITANIUM_CLASS_FLDP)
6759 && ia64_ld_address_bypass_p (insn, next))
6761 has_mem_op_consumer_p = true;
6762 break;
6766 insn->call = has_mem_op_consumer_p;
6770 /* We're beginning a new block. Initialize data structures as necessary. */
6772 static void
6773 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6774 int sched_verbose ATTRIBUTE_UNUSED,
6775 int max_ready ATTRIBUTE_UNUSED)
6777 #ifdef ENABLE_CHECKING
6778 rtx insn;
6780 if (!sel_sched_p () && reload_completed)
6781 for (insn = NEXT_INSN (current_sched_info->prev_head);
6782 insn != current_sched_info->next_tail;
6783 insn = NEXT_INSN (insn))
6784 gcc_assert (!SCHED_GROUP_P (insn));
6785 #endif
6786 last_scheduled_insn = NULL_RTX;
6787 init_insn_group_barriers ();
6789 current_cycle = 0;
6790 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
6793 /* We're beginning a scheduling pass. Check assertion. */
6795 static void
6796 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6797 int sched_verbose ATTRIBUTE_UNUSED,
6798 int max_ready ATTRIBUTE_UNUSED)
6800 gcc_assert (pending_data_specs == 0);
6803 /* Scheduling pass is now finished. Free/reset static variable. */
6804 static void
6805 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6806 int sched_verbose ATTRIBUTE_UNUSED)
6808 gcc_assert (pending_data_specs == 0);
6811 /* Return TRUE if INSN is a load (either normal or speculative, but not a
6812 speculation check), FALSE otherwise. */
6813 static bool
6814 is_load_p (rtx insn)
6816 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6818 return
6819 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6820 && get_attr_check_load (insn) == CHECK_LOAD_NO);
6823 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6824 (taking account for 3-cycle cache reference postponing for stores: Intel
6825 Itanium 2 Reference Manual for Software Development and Optimization,
6826 6.7.3.1). */
6827 static void
6828 record_memory_reference (rtx insn)
6830 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6832 switch (insn_class) {
6833 case ITANIUM_CLASS_FLD:
6834 case ITANIUM_CLASS_LD:
6835 mem_ops_in_group[current_cycle % 4]++;
6836 break;
6837 case ITANIUM_CLASS_STF:
6838 case ITANIUM_CLASS_ST:
6839 mem_ops_in_group[(current_cycle + 3) % 4]++;
6840 break;
6841 default:;
6845 /* We are about to being issuing insns for this clock cycle.
6846 Override the default sort algorithm to better slot instructions. */
6848 static int
6849 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6850 int *pn_ready, int clock_var,
6851 int reorder_type)
6853 int n_asms;
6854 int n_ready = *pn_ready;
6855 rtx *e_ready = ready + n_ready;
6856 rtx *insnp;
6858 if (sched_verbose)
6859 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6861 if (reorder_type == 0)
6863 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6864 n_asms = 0;
6865 for (insnp = ready; insnp < e_ready; insnp++)
6866 if (insnp < e_ready)
6868 rtx insn = *insnp;
6869 enum attr_type t = ia64_safe_type (insn);
6870 if (t == TYPE_UNKNOWN)
6872 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6873 || asm_noperands (PATTERN (insn)) >= 0)
6875 rtx lowest = ready[n_asms];
6876 ready[n_asms] = insn;
6877 *insnp = lowest;
6878 n_asms++;
6880 else
6882 rtx highest = ready[n_ready - 1];
6883 ready[n_ready - 1] = insn;
6884 *insnp = highest;
6885 return 1;
6890 if (n_asms < n_ready)
6892 /* Some normal insns to process. Skip the asms. */
6893 ready += n_asms;
6894 n_ready -= n_asms;
6896 else if (n_ready > 0)
6897 return 1;
6900 if (ia64_final_schedule)
6902 int deleted = 0;
6903 int nr_need_stop = 0;
6905 for (insnp = ready; insnp < e_ready; insnp++)
6906 if (safe_group_barrier_needed (*insnp))
6907 nr_need_stop++;
6909 if (reorder_type == 1 && n_ready == nr_need_stop)
6910 return 0;
6911 if (reorder_type == 0)
6912 return 1;
6913 insnp = e_ready;
6914 /* Move down everything that needs a stop bit, preserving
6915 relative order. */
6916 while (insnp-- > ready + deleted)
6917 while (insnp >= ready + deleted)
6919 rtx insn = *insnp;
6920 if (! safe_group_barrier_needed (insn))
6921 break;
6922 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6923 *ready = insn;
6924 deleted++;
6926 n_ready -= deleted;
6927 ready += deleted;
6930 current_cycle = clock_var;
6931 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
6933 int moved = 0;
6935 insnp = e_ready;
6936 /* Move down loads/stores, preserving relative order. */
6937 while (insnp-- > ready + moved)
6938 while (insnp >= ready + moved)
6940 rtx insn = *insnp;
6941 if (! is_load_p (insn))
6942 break;
6943 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6944 *ready = insn;
6945 moved++;
6947 n_ready -= moved;
6948 ready += moved;
6951 return 1;
6954 /* We are about to being issuing insns for this clock cycle. Override
6955 the default sort algorithm to better slot instructions. */
6957 static int
6958 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6959 int clock_var)
6961 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6962 pn_ready, clock_var, 0);
6965 /* Like ia64_sched_reorder, but called after issuing each insn.
6966 Override the default sort algorithm to better slot instructions. */
6968 static int
6969 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6970 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6971 int *pn_ready, int clock_var)
6973 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6974 clock_var, 1);
6977 /* We are about to issue INSN. Return the number of insns left on the
6978 ready queue that can be issued this cycle. */
6980 static int
6981 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6982 int sched_verbose ATTRIBUTE_UNUSED,
6983 rtx insn ATTRIBUTE_UNUSED,
6984 int can_issue_more ATTRIBUTE_UNUSED)
6986 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
6987 /* Modulo scheduling does not extend h_i_d when emitting
6988 new instructions. Don't use h_i_d, if we don't have to. */
6990 if (DONE_SPEC (insn) & BEGIN_DATA)
6991 pending_data_specs++;
6992 if (CHECK_SPEC (insn) & BEGIN_DATA)
6993 pending_data_specs--;
6996 if (DEBUG_INSN_P (insn))
6997 return 1;
6999 last_scheduled_insn = insn;
7000 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7001 if (reload_completed)
7003 int needed = group_barrier_needed (insn);
7005 gcc_assert (!needed);
7006 if (GET_CODE (insn) == CALL_INSN)
7007 init_insn_group_barriers ();
7008 stops_p [INSN_UID (insn)] = stop_before_p;
7009 stop_before_p = 0;
7011 record_memory_reference (insn);
7013 return 1;
7016 /* We are choosing insn from the ready queue. Return nonzero if INSN
7017 can be chosen. */
7019 static int
7020 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7022 gcc_assert (insn && INSN_P (insn));
7023 return ((!reload_completed
7024 || !safe_group_barrier_needed (insn))
7025 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7026 && (!mflag_sched_mem_insns_hard_limit
7027 || !is_load_p (insn)
7028 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7031 /* We are choosing insn from the ready queue. Return nonzero if INSN
7032 can be chosen. */
7034 static bool
7035 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7037 gcc_assert (insn && INSN_P (insn));
7038 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7039 we keep ALAT half-empty. */
7040 return (pending_data_specs < 16
7041 || !(TODO_SPEC (insn) & BEGIN_DATA));
7044 /* The following variable value is pseudo-insn used by the DFA insn
7045 scheduler to change the DFA state when the simulated clock is
7046 increased. */
7048 static rtx dfa_pre_cycle_insn;
7050 /* Returns 1 when a meaningful insn was scheduled between the last group
7051 barrier and LAST. */
7052 static int
7053 scheduled_good_insn (rtx last)
7055 if (last && recog_memoized (last) >= 0)
7056 return 1;
7058 for ( ;
7059 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7060 && !stops_p[INSN_UID (last)];
7061 last = PREV_INSN (last))
7062 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7063 the ebb we're scheduling. */
7064 if (INSN_P (last) && recog_memoized (last) >= 0)
7065 return 1;
7067 return 0;
7070 /* We are about to being issuing INSN. Return nonzero if we cannot
7071 issue it on given cycle CLOCK and return zero if we should not sort
7072 the ready queue on the next clock start. */
7074 static int
7075 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7076 int clock, int *sort_p)
7078 int setup_clocks_p = FALSE;
7080 gcc_assert (insn && INSN_P (insn));
7082 if (DEBUG_INSN_P (insn))
7083 return 0;
7085 /* When a group barrier is needed for insn, last_scheduled_insn
7086 should be set. */
7087 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7088 || last_scheduled_insn);
7090 if ((reload_completed
7091 && (safe_group_barrier_needed (insn)
7092 || (mflag_sched_stop_bits_after_every_cycle
7093 && last_clock != clock
7094 && last_scheduled_insn
7095 && scheduled_good_insn (last_scheduled_insn))))
7096 || (last_scheduled_insn
7097 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7098 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7099 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7101 init_insn_group_barriers ();
7103 if (verbose && dump)
7104 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7105 last_clock == clock ? " + cycle advance" : "");
7107 stop_before_p = 1;
7108 current_cycle = clock;
7109 mem_ops_in_group[current_cycle % 4] = 0;
7111 if (last_clock == clock)
7113 state_transition (curr_state, dfa_stop_insn);
7114 if (TARGET_EARLY_STOP_BITS)
7115 *sort_p = (last_scheduled_insn == NULL_RTX
7116 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7117 else
7118 *sort_p = 0;
7119 return 1;
7121 else if (reload_completed)
7122 setup_clocks_p = TRUE;
7124 if (last_scheduled_insn)
7126 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7127 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7128 state_reset (curr_state);
7129 else
7131 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7132 state_transition (curr_state, dfa_stop_insn);
7133 state_transition (curr_state, dfa_pre_cycle_insn);
7134 state_transition (curr_state, NULL);
7138 else if (reload_completed)
7139 setup_clocks_p = TRUE;
7141 return 0;
7144 /* Implement targetm.sched.h_i_d_extended hook.
7145 Extend internal data structures. */
7146 static void
7147 ia64_h_i_d_extended (void)
7149 if (stops_p != NULL)
7151 int new_clocks_length = get_max_uid () * 3 / 2;
7152 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7153 clocks_length = new_clocks_length;
7158 /* This structure describes the data used by the backend to guide scheduling.
7159 When the current scheduling point is switched, this data should be saved
7160 and restored later, if the scheduler returns to this point. */
7161 struct _ia64_sched_context
7163 state_t prev_cycle_state;
7164 rtx last_scheduled_insn;
7165 struct reg_write_state rws_sum[NUM_REGS];
7166 struct reg_write_state rws_insn[NUM_REGS];
7167 int first_instruction;
7168 int pending_data_specs;
7169 int current_cycle;
7170 char mem_ops_in_group[4];
7172 typedef struct _ia64_sched_context *ia64_sched_context_t;
7174 /* Allocates a scheduling context. */
7175 static void *
7176 ia64_alloc_sched_context (void)
7178 return xmalloc (sizeof (struct _ia64_sched_context));
7181 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7182 the global context otherwise. */
7183 static void
7184 ia64_init_sched_context (void *_sc, bool clean_p)
7186 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7188 sc->prev_cycle_state = xmalloc (dfa_state_size);
7189 if (clean_p)
7191 state_reset (sc->prev_cycle_state);
7192 sc->last_scheduled_insn = NULL_RTX;
7193 memset (sc->rws_sum, 0, sizeof (rws_sum));
7194 memset (sc->rws_insn, 0, sizeof (rws_insn));
7195 sc->first_instruction = 1;
7196 sc->pending_data_specs = 0;
7197 sc->current_cycle = 0;
7198 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7200 else
7202 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7203 sc->last_scheduled_insn = last_scheduled_insn;
7204 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7205 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7206 sc->first_instruction = first_instruction;
7207 sc->pending_data_specs = pending_data_specs;
7208 sc->current_cycle = current_cycle;
7209 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7213 /* Sets the global scheduling context to the one pointed to by _SC. */
7214 static void
7215 ia64_set_sched_context (void *_sc)
7217 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7219 gcc_assert (sc != NULL);
7221 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7222 last_scheduled_insn = sc->last_scheduled_insn;
7223 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7224 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7225 first_instruction = sc->first_instruction;
7226 pending_data_specs = sc->pending_data_specs;
7227 current_cycle = sc->current_cycle;
7228 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7231 /* Clears the data in the _SC scheduling context. */
7232 static void
7233 ia64_clear_sched_context (void *_sc)
7235 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7237 free (sc->prev_cycle_state);
7238 sc->prev_cycle_state = NULL;
7241 /* Frees the _SC scheduling context. */
7242 static void
7243 ia64_free_sched_context (void *_sc)
7245 gcc_assert (_sc != NULL);
7247 free (_sc);
7250 typedef rtx (* gen_func_t) (rtx, rtx);
7252 /* Return a function that will generate a load of mode MODE_NO
7253 with speculation types TS. */
7254 static gen_func_t
7255 get_spec_load_gen_function (ds_t ts, int mode_no)
7257 static gen_func_t gen_ld_[] = {
7258 gen_movbi,
7259 gen_movqi_internal,
7260 gen_movhi_internal,
7261 gen_movsi_internal,
7262 gen_movdi_internal,
7263 gen_movsf_internal,
7264 gen_movdf_internal,
7265 gen_movxf_internal,
7266 gen_movti_internal,
7267 gen_zero_extendqidi2,
7268 gen_zero_extendhidi2,
7269 gen_zero_extendsidi2,
7272 static gen_func_t gen_ld_a[] = {
7273 gen_movbi_advanced,
7274 gen_movqi_advanced,
7275 gen_movhi_advanced,
7276 gen_movsi_advanced,
7277 gen_movdi_advanced,
7278 gen_movsf_advanced,
7279 gen_movdf_advanced,
7280 gen_movxf_advanced,
7281 gen_movti_advanced,
7282 gen_zero_extendqidi2_advanced,
7283 gen_zero_extendhidi2_advanced,
7284 gen_zero_extendsidi2_advanced,
7286 static gen_func_t gen_ld_s[] = {
7287 gen_movbi_speculative,
7288 gen_movqi_speculative,
7289 gen_movhi_speculative,
7290 gen_movsi_speculative,
7291 gen_movdi_speculative,
7292 gen_movsf_speculative,
7293 gen_movdf_speculative,
7294 gen_movxf_speculative,
7295 gen_movti_speculative,
7296 gen_zero_extendqidi2_speculative,
7297 gen_zero_extendhidi2_speculative,
7298 gen_zero_extendsidi2_speculative,
7300 static gen_func_t gen_ld_sa[] = {
7301 gen_movbi_speculative_advanced,
7302 gen_movqi_speculative_advanced,
7303 gen_movhi_speculative_advanced,
7304 gen_movsi_speculative_advanced,
7305 gen_movdi_speculative_advanced,
7306 gen_movsf_speculative_advanced,
7307 gen_movdf_speculative_advanced,
7308 gen_movxf_speculative_advanced,
7309 gen_movti_speculative_advanced,
7310 gen_zero_extendqidi2_speculative_advanced,
7311 gen_zero_extendhidi2_speculative_advanced,
7312 gen_zero_extendsidi2_speculative_advanced,
7314 static gen_func_t gen_ld_s_a[] = {
7315 gen_movbi_speculative_a,
7316 gen_movqi_speculative_a,
7317 gen_movhi_speculative_a,
7318 gen_movsi_speculative_a,
7319 gen_movdi_speculative_a,
7320 gen_movsf_speculative_a,
7321 gen_movdf_speculative_a,
7322 gen_movxf_speculative_a,
7323 gen_movti_speculative_a,
7324 gen_zero_extendqidi2_speculative_a,
7325 gen_zero_extendhidi2_speculative_a,
7326 gen_zero_extendsidi2_speculative_a,
7329 gen_func_t *gen_ld;
7331 if (ts & BEGIN_DATA)
7333 if (ts & BEGIN_CONTROL)
7334 gen_ld = gen_ld_sa;
7335 else
7336 gen_ld = gen_ld_a;
7338 else if (ts & BEGIN_CONTROL)
7340 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7341 || ia64_needs_block_p (ts))
7342 gen_ld = gen_ld_s;
7343 else
7344 gen_ld = gen_ld_s_a;
7346 else if (ts == 0)
7347 gen_ld = gen_ld_;
7348 else
7349 gcc_unreachable ();
7351 return gen_ld[mode_no];
7354 /* Constants that help mapping 'enum machine_mode' to int. */
7355 enum SPEC_MODES
7357 SPEC_MODE_INVALID = -1,
7358 SPEC_MODE_FIRST = 0,
7359 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7360 SPEC_MODE_FOR_EXTEND_LAST = 3,
7361 SPEC_MODE_LAST = 8
7364 enum
7366 /* Offset to reach ZERO_EXTEND patterns. */
7367 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7370 /* Return index of the MODE. */
7371 static int
7372 ia64_mode_to_int (enum machine_mode mode)
7374 switch (mode)
7376 case BImode: return 0; /* SPEC_MODE_FIRST */
7377 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7378 case HImode: return 2;
7379 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7380 case DImode: return 4;
7381 case SFmode: return 5;
7382 case DFmode: return 6;
7383 case XFmode: return 7;
7384 case TImode:
7385 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7386 mentioned in itanium[12].md. Predicate fp_register_operand also
7387 needs to be defined. Bottom line: better disable for now. */
7388 return SPEC_MODE_INVALID;
7389 default: return SPEC_MODE_INVALID;
7393 /* Provide information about speculation capabilities. */
7394 static void
7395 ia64_set_sched_flags (spec_info_t spec_info)
7397 unsigned int *flags = &(current_sched_info->flags);
7399 if (*flags & SCHED_RGN
7400 || *flags & SCHED_EBB
7401 || *flags & SEL_SCHED)
7403 int mask = 0;
7405 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7406 || (mflag_sched_ar_data_spec && reload_completed))
7408 mask |= BEGIN_DATA;
7410 if (!sel_sched_p ()
7411 && ((mflag_sched_br_in_data_spec && !reload_completed)
7412 || (mflag_sched_ar_in_data_spec && reload_completed)))
7413 mask |= BE_IN_DATA;
7416 if (mflag_sched_control_spec
7417 && (!sel_sched_p ()
7418 || reload_completed))
7420 mask |= BEGIN_CONTROL;
7422 if (!sel_sched_p () && mflag_sched_in_control_spec)
7423 mask |= BE_IN_CONTROL;
7426 spec_info->mask = mask;
7428 if (mask)
7430 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7432 if (mask & BE_IN_SPEC)
7433 *flags |= NEW_BBS;
7435 spec_info->flags = 0;
7437 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7438 spec_info->flags |= PREFER_NON_DATA_SPEC;
7440 if (mask & CONTROL_SPEC)
7442 if (mflag_sched_prefer_non_control_spec_insns)
7443 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7445 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7446 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7449 if (sched_verbose >= 1)
7450 spec_info->dump = sched_dump;
7451 else
7452 spec_info->dump = 0;
7454 if (mflag_sched_count_spec_in_critical_path)
7455 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7458 else
7459 spec_info->mask = 0;
7462 /* If INSN is an appropriate load return its mode.
7463 Return -1 otherwise. */
7464 static int
7465 get_mode_no_for_insn (rtx insn)
7467 rtx reg, mem, mode_rtx;
7468 int mode_no;
7469 bool extend_p;
7471 extract_insn_cached (insn);
7473 /* We use WHICH_ALTERNATIVE only after reload. This will
7474 guarantee that reload won't touch a speculative insn. */
7476 if (recog_data.n_operands != 2)
7477 return -1;
7479 reg = recog_data.operand[0];
7480 mem = recog_data.operand[1];
7482 /* We should use MEM's mode since REG's mode in presence of
7483 ZERO_EXTEND will always be DImode. */
7484 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7485 /* Process non-speculative ld. */
7487 if (!reload_completed)
7489 /* Do not speculate into regs like ar.lc. */
7490 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7491 return -1;
7493 if (!MEM_P (mem))
7494 return -1;
7497 rtx mem_reg = XEXP (mem, 0);
7499 if (!REG_P (mem_reg))
7500 return -1;
7503 mode_rtx = mem;
7505 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7507 gcc_assert (REG_P (reg) && MEM_P (mem));
7508 mode_rtx = mem;
7510 else
7511 return -1;
7513 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7514 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7515 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7516 /* Process speculative ld or ld.c. */
7518 gcc_assert (REG_P (reg) && MEM_P (mem));
7519 mode_rtx = mem;
7521 else
7523 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7525 if (attr_class == ITANIUM_CLASS_CHK_A
7526 || attr_class == ITANIUM_CLASS_CHK_S_I
7527 || attr_class == ITANIUM_CLASS_CHK_S_F)
7528 /* Process chk. */
7529 mode_rtx = reg;
7530 else
7531 return -1;
7534 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7536 if (mode_no == SPEC_MODE_INVALID)
7537 return -1;
7539 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7541 if (extend_p)
7543 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7544 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7545 return -1;
7547 mode_no += SPEC_GEN_EXTEND_OFFSET;
7550 return mode_no;
7553 /* If X is an unspec part of a speculative load, return its code.
7554 Return -1 otherwise. */
7555 static int
7556 get_spec_unspec_code (const_rtx x)
7558 if (GET_CODE (x) != UNSPEC)
7559 return -1;
7562 int code;
7564 code = XINT (x, 1);
7566 switch (code)
7568 case UNSPEC_LDA:
7569 case UNSPEC_LDS:
7570 case UNSPEC_LDS_A:
7571 case UNSPEC_LDSA:
7572 return code;
7574 default:
7575 return -1;
7580 /* Implement skip_rtx_p hook. */
7581 static bool
7582 ia64_skip_rtx_p (const_rtx x)
7584 return get_spec_unspec_code (x) != -1;
7587 /* If INSN is a speculative load, return its UNSPEC code.
7588 Return -1 otherwise. */
7589 static int
7590 get_insn_spec_code (const_rtx insn)
7592 rtx pat, reg, mem;
7594 pat = PATTERN (insn);
7596 if (GET_CODE (pat) == COND_EXEC)
7597 pat = COND_EXEC_CODE (pat);
7599 if (GET_CODE (pat) != SET)
7600 return -1;
7602 reg = SET_DEST (pat);
7603 if (!REG_P (reg))
7604 return -1;
7606 mem = SET_SRC (pat);
7607 if (GET_CODE (mem) == ZERO_EXTEND)
7608 mem = XEXP (mem, 0);
7610 return get_spec_unspec_code (mem);
7613 /* If INSN is a speculative load, return a ds with the speculation types.
7614 Otherwise [if INSN is a normal instruction] return 0. */
7615 static ds_t
7616 ia64_get_insn_spec_ds (rtx insn)
7618 int code = get_insn_spec_code (insn);
7620 switch (code)
7622 case UNSPEC_LDA:
7623 return BEGIN_DATA;
7625 case UNSPEC_LDS:
7626 case UNSPEC_LDS_A:
7627 return BEGIN_CONTROL;
7629 case UNSPEC_LDSA:
7630 return BEGIN_DATA | BEGIN_CONTROL;
7632 default:
7633 return 0;
7637 /* If INSN is a speculative load return a ds with the speculation types that
7638 will be checked.
7639 Otherwise [if INSN is a normal instruction] return 0. */
7640 static ds_t
7641 ia64_get_insn_checked_ds (rtx insn)
7643 int code = get_insn_spec_code (insn);
7645 switch (code)
7647 case UNSPEC_LDA:
7648 return BEGIN_DATA | BEGIN_CONTROL;
7650 case UNSPEC_LDS:
7651 return BEGIN_CONTROL;
7653 case UNSPEC_LDS_A:
7654 case UNSPEC_LDSA:
7655 return BEGIN_DATA | BEGIN_CONTROL;
7657 default:
7658 return 0;
7662 /* If GEN_P is true, calculate the index of needed speculation check and return
7663 speculative pattern for INSN with speculative mode TS, machine mode
7664 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7665 If GEN_P is false, just calculate the index of needed speculation check. */
7666 static rtx
7667 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7669 rtx pat, new_pat;
7670 gen_func_t gen_load;
7672 gen_load = get_spec_load_gen_function (ts, mode_no);
7674 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7675 copy_rtx (recog_data.operand[1]));
7677 pat = PATTERN (insn);
7678 if (GET_CODE (pat) == COND_EXEC)
7679 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7680 new_pat);
7682 return new_pat;
7685 static bool
7686 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7687 ds_t ds ATTRIBUTE_UNUSED)
7689 return false;
7692 /* Implement targetm.sched.speculate_insn hook.
7693 Check if the INSN can be TS speculative.
7694 If 'no' - return -1.
7695 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7696 If current pattern of the INSN already provides TS speculation,
7697 return 0. */
7698 static int
7699 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7701 int mode_no;
7702 int res;
7704 gcc_assert (!(ts & ~SPECULATIVE));
7706 if (ia64_spec_check_p (insn))
7707 return -1;
7709 if ((ts & BE_IN_SPEC)
7710 && !insn_can_be_in_speculative_p (insn, ts))
7711 return -1;
7713 mode_no = get_mode_no_for_insn (insn);
7715 if (mode_no != SPEC_MODE_INVALID)
7717 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7718 res = 0;
7719 else
7721 res = 1;
7722 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7725 else
7726 res = -1;
7728 return res;
7731 /* Return a function that will generate a check for speculation TS with mode
7732 MODE_NO.
7733 If simple check is needed, pass true for SIMPLE_CHECK_P.
7734 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7735 static gen_func_t
7736 get_spec_check_gen_function (ds_t ts, int mode_no,
7737 bool simple_check_p, bool clearing_check_p)
7739 static gen_func_t gen_ld_c_clr[] = {
7740 gen_movbi_clr,
7741 gen_movqi_clr,
7742 gen_movhi_clr,
7743 gen_movsi_clr,
7744 gen_movdi_clr,
7745 gen_movsf_clr,
7746 gen_movdf_clr,
7747 gen_movxf_clr,
7748 gen_movti_clr,
7749 gen_zero_extendqidi2_clr,
7750 gen_zero_extendhidi2_clr,
7751 gen_zero_extendsidi2_clr,
7753 static gen_func_t gen_ld_c_nc[] = {
7754 gen_movbi_nc,
7755 gen_movqi_nc,
7756 gen_movhi_nc,
7757 gen_movsi_nc,
7758 gen_movdi_nc,
7759 gen_movsf_nc,
7760 gen_movdf_nc,
7761 gen_movxf_nc,
7762 gen_movti_nc,
7763 gen_zero_extendqidi2_nc,
7764 gen_zero_extendhidi2_nc,
7765 gen_zero_extendsidi2_nc,
7767 static gen_func_t gen_chk_a_clr[] = {
7768 gen_advanced_load_check_clr_bi,
7769 gen_advanced_load_check_clr_qi,
7770 gen_advanced_load_check_clr_hi,
7771 gen_advanced_load_check_clr_si,
7772 gen_advanced_load_check_clr_di,
7773 gen_advanced_load_check_clr_sf,
7774 gen_advanced_load_check_clr_df,
7775 gen_advanced_load_check_clr_xf,
7776 gen_advanced_load_check_clr_ti,
7777 gen_advanced_load_check_clr_di,
7778 gen_advanced_load_check_clr_di,
7779 gen_advanced_load_check_clr_di,
7781 static gen_func_t gen_chk_a_nc[] = {
7782 gen_advanced_load_check_nc_bi,
7783 gen_advanced_load_check_nc_qi,
7784 gen_advanced_load_check_nc_hi,
7785 gen_advanced_load_check_nc_si,
7786 gen_advanced_load_check_nc_di,
7787 gen_advanced_load_check_nc_sf,
7788 gen_advanced_load_check_nc_df,
7789 gen_advanced_load_check_nc_xf,
7790 gen_advanced_load_check_nc_ti,
7791 gen_advanced_load_check_nc_di,
7792 gen_advanced_load_check_nc_di,
7793 gen_advanced_load_check_nc_di,
7795 static gen_func_t gen_chk_s[] = {
7796 gen_speculation_check_bi,
7797 gen_speculation_check_qi,
7798 gen_speculation_check_hi,
7799 gen_speculation_check_si,
7800 gen_speculation_check_di,
7801 gen_speculation_check_sf,
7802 gen_speculation_check_df,
7803 gen_speculation_check_xf,
7804 gen_speculation_check_ti,
7805 gen_speculation_check_di,
7806 gen_speculation_check_di,
7807 gen_speculation_check_di,
7810 gen_func_t *gen_check;
7812 if (ts & BEGIN_DATA)
7814 /* We don't need recovery because even if this is ld.sa
7815 ALAT entry will be allocated only if NAT bit is set to zero.
7816 So it is enough to use ld.c here. */
7818 if (simple_check_p)
7820 gcc_assert (mflag_sched_spec_ldc);
7822 if (clearing_check_p)
7823 gen_check = gen_ld_c_clr;
7824 else
7825 gen_check = gen_ld_c_nc;
7827 else
7829 if (clearing_check_p)
7830 gen_check = gen_chk_a_clr;
7831 else
7832 gen_check = gen_chk_a_nc;
7835 else if (ts & BEGIN_CONTROL)
7837 if (simple_check_p)
7838 /* We might want to use ld.sa -> ld.c instead of
7839 ld.s -> chk.s. */
7841 gcc_assert (!ia64_needs_block_p (ts));
7843 if (clearing_check_p)
7844 gen_check = gen_ld_c_clr;
7845 else
7846 gen_check = gen_ld_c_nc;
7848 else
7850 gen_check = gen_chk_s;
7853 else
7854 gcc_unreachable ();
7856 gcc_assert (mode_no >= 0);
7857 return gen_check[mode_no];
7860 /* Return nonzero, if INSN needs branchy recovery check. */
7861 static bool
7862 ia64_needs_block_p (ds_t ts)
7864 if (ts & BEGIN_DATA)
7865 return !mflag_sched_spec_ldc;
7867 gcc_assert ((ts & BEGIN_CONTROL) != 0);
7869 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7872 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7873 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7874 Otherwise, generate a simple check. */
7875 static rtx
7876 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7878 rtx op1, pat, check_pat;
7879 gen_func_t gen_check;
7880 int mode_no;
7882 mode_no = get_mode_no_for_insn (insn);
7883 gcc_assert (mode_no >= 0);
7885 if (label)
7886 op1 = label;
7887 else
7889 gcc_assert (!ia64_needs_block_p (ds));
7890 op1 = copy_rtx (recog_data.operand[1]);
7893 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
7894 true);
7896 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
7898 pat = PATTERN (insn);
7899 if (GET_CODE (pat) == COND_EXEC)
7900 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7901 check_pat);
7903 return check_pat;
7906 /* Return nonzero, if X is branchy recovery check. */
7907 static int
7908 ia64_spec_check_p (rtx x)
7910 x = PATTERN (x);
7911 if (GET_CODE (x) == COND_EXEC)
7912 x = COND_EXEC_CODE (x);
7913 if (GET_CODE (x) == SET)
7914 return ia64_spec_check_src_p (SET_SRC (x));
7915 return 0;
7918 /* Return nonzero, if SRC belongs to recovery check. */
7919 static int
7920 ia64_spec_check_src_p (rtx src)
7922 if (GET_CODE (src) == IF_THEN_ELSE)
7924 rtx t;
7926 t = XEXP (src, 0);
7927 if (GET_CODE (t) == NE)
7929 t = XEXP (t, 0);
7931 if (GET_CODE (t) == UNSPEC)
7933 int code;
7935 code = XINT (t, 1);
7937 if (code == UNSPEC_LDCCLR
7938 || code == UNSPEC_LDCNC
7939 || code == UNSPEC_CHKACLR
7940 || code == UNSPEC_CHKANC
7941 || code == UNSPEC_CHKS)
7943 gcc_assert (code != 0);
7944 return code;
7949 return 0;
7953 /* The following page contains abstract data `bundle states' which are
7954 used for bundling insns (inserting nops and template generation). */
7956 /* The following describes state of insn bundling. */
7958 struct bundle_state
7960 /* Unique bundle state number to identify them in the debugging
7961 output */
7962 int unique_num;
7963 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
7964 /* number nops before and after the insn */
7965 short before_nops_num, after_nops_num;
7966 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7967 insn */
7968 int cost; /* cost of the state in cycles */
7969 int accumulated_insns_num; /* number of all previous insns including
7970 nops. L is considered as 2 insns */
7971 int branch_deviation; /* deviation of previous branches from 3rd slots */
7972 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
7973 struct bundle_state *next; /* next state with the same insn_num */
7974 struct bundle_state *originator; /* originator (previous insn state) */
7975 /* All bundle states are in the following chain. */
7976 struct bundle_state *allocated_states_chain;
7977 /* The DFA State after issuing the insn and the nops. */
7978 state_t dfa_state;
7981 /* The following is map insn number to the corresponding bundle state. */
7983 static struct bundle_state **index_to_bundle_states;
7985 /* The unique number of next bundle state. */
7987 static int bundle_states_num;
7989 /* All allocated bundle states are in the following chain. */
7991 static struct bundle_state *allocated_bundle_states_chain;
7993 /* All allocated but not used bundle states are in the following
7994 chain. */
7996 static struct bundle_state *free_bundle_state_chain;
7999 /* The following function returns a free bundle state. */
8001 static struct bundle_state *
8002 get_free_bundle_state (void)
8004 struct bundle_state *result;
8006 if (free_bundle_state_chain != NULL)
8008 result = free_bundle_state_chain;
8009 free_bundle_state_chain = result->next;
8011 else
8013 result = XNEW (struct bundle_state);
8014 result->dfa_state = xmalloc (dfa_state_size);
8015 result->allocated_states_chain = allocated_bundle_states_chain;
8016 allocated_bundle_states_chain = result;
8018 result->unique_num = bundle_states_num++;
8019 return result;
8023 /* The following function frees given bundle state. */
8025 static void
8026 free_bundle_state (struct bundle_state *state)
8028 state->next = free_bundle_state_chain;
8029 free_bundle_state_chain = state;
8032 /* Start work with abstract data `bundle states'. */
8034 static void
8035 initiate_bundle_states (void)
8037 bundle_states_num = 0;
8038 free_bundle_state_chain = NULL;
8039 allocated_bundle_states_chain = NULL;
8042 /* Finish work with abstract data `bundle states'. */
8044 static void
8045 finish_bundle_states (void)
8047 struct bundle_state *curr_state, *next_state;
8049 for (curr_state = allocated_bundle_states_chain;
8050 curr_state != NULL;
8051 curr_state = next_state)
8053 next_state = curr_state->allocated_states_chain;
8054 free (curr_state->dfa_state);
8055 free (curr_state);
8059 /* Hash table of the bundle states. The key is dfa_state and insn_num
8060 of the bundle states. */
8062 static htab_t bundle_state_table;
8064 /* The function returns hash of BUNDLE_STATE. */
8066 static unsigned
8067 bundle_state_hash (const void *bundle_state)
8069 const struct bundle_state *const state
8070 = (const struct bundle_state *) bundle_state;
8071 unsigned result, i;
8073 for (result = i = 0; i < dfa_state_size; i++)
8074 result += (((unsigned char *) state->dfa_state) [i]
8075 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8076 return result + state->insn_num;
8079 /* The function returns nonzero if the bundle state keys are equal. */
8081 static int
8082 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8084 const struct bundle_state *const state1
8085 = (const struct bundle_state *) bundle_state_1;
8086 const struct bundle_state *const state2
8087 = (const struct bundle_state *) bundle_state_2;
8089 return (state1->insn_num == state2->insn_num
8090 && memcmp (state1->dfa_state, state2->dfa_state,
8091 dfa_state_size) == 0);
8094 /* The function inserts the BUNDLE_STATE into the hash table. The
8095 function returns nonzero if the bundle has been inserted into the
8096 table. The table contains the best bundle state with given key. */
8098 static int
8099 insert_bundle_state (struct bundle_state *bundle_state)
8101 void **entry_ptr;
8103 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8104 if (*entry_ptr == NULL)
8106 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8107 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8108 *entry_ptr = (void *) bundle_state;
8109 return TRUE;
8111 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8112 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8113 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8114 > bundle_state->accumulated_insns_num
8115 || (((struct bundle_state *)
8116 *entry_ptr)->accumulated_insns_num
8117 == bundle_state->accumulated_insns_num
8118 && (((struct bundle_state *)
8119 *entry_ptr)->branch_deviation
8120 > bundle_state->branch_deviation
8121 || (((struct bundle_state *)
8122 *entry_ptr)->branch_deviation
8123 == bundle_state->branch_deviation
8124 && ((struct bundle_state *)
8125 *entry_ptr)->middle_bundle_stops
8126 > bundle_state->middle_bundle_stops))))))
8129 struct bundle_state temp;
8131 temp = *(struct bundle_state *) *entry_ptr;
8132 *(struct bundle_state *) *entry_ptr = *bundle_state;
8133 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8134 *bundle_state = temp;
8136 return FALSE;
8139 /* Start work with the hash table. */
8141 static void
8142 initiate_bundle_state_table (void)
8144 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8145 (htab_del) 0);
8148 /* Finish work with the hash table. */
8150 static void
8151 finish_bundle_state_table (void)
8153 htab_delete (bundle_state_table);
8158 /* The following variable is a insn `nop' used to check bundle states
8159 with different number of inserted nops. */
8161 static rtx ia64_nop;
8163 /* The following function tries to issue NOPS_NUM nops for the current
8164 state without advancing processor cycle. If it failed, the
8165 function returns FALSE and frees the current state. */
8167 static int
8168 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8170 int i;
8172 for (i = 0; i < nops_num; i++)
8173 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8175 free_bundle_state (curr_state);
8176 return FALSE;
8178 return TRUE;
8181 /* The following function tries to issue INSN for the current
8182 state without advancing processor cycle. If it failed, the
8183 function returns FALSE and frees the current state. */
8185 static int
8186 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8188 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8190 free_bundle_state (curr_state);
8191 return FALSE;
8193 return TRUE;
8196 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8197 starting with ORIGINATOR without advancing processor cycle. If
8198 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8199 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8200 If it was successful, the function creates new bundle state and
8201 insert into the hash table and into `index_to_bundle_states'. */
8203 static void
8204 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8205 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8207 struct bundle_state *curr_state;
8209 curr_state = get_free_bundle_state ();
8210 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8211 curr_state->insn = insn;
8212 curr_state->insn_num = originator->insn_num + 1;
8213 curr_state->cost = originator->cost;
8214 curr_state->originator = originator;
8215 curr_state->before_nops_num = before_nops_num;
8216 curr_state->after_nops_num = 0;
8217 curr_state->accumulated_insns_num
8218 = originator->accumulated_insns_num + before_nops_num;
8219 curr_state->branch_deviation = originator->branch_deviation;
8220 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8221 gcc_assert (insn);
8222 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8224 gcc_assert (GET_MODE (insn) != TImode);
8225 if (!try_issue_nops (curr_state, before_nops_num))
8226 return;
8227 if (!try_issue_insn (curr_state, insn))
8228 return;
8229 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8230 if (curr_state->accumulated_insns_num % 3 != 0)
8231 curr_state->middle_bundle_stops++;
8232 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8233 && curr_state->accumulated_insns_num % 3 != 0)
8235 free_bundle_state (curr_state);
8236 return;
8239 else if (GET_MODE (insn) != TImode)
8241 if (!try_issue_nops (curr_state, before_nops_num))
8242 return;
8243 if (!try_issue_insn (curr_state, insn))
8244 return;
8245 curr_state->accumulated_insns_num++;
8246 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8247 && asm_noperands (PATTERN (insn)) < 0);
8249 if (ia64_safe_type (insn) == TYPE_L)
8250 curr_state->accumulated_insns_num++;
8252 else
8254 /* If this is an insn that must be first in a group, then don't allow
8255 nops to be emitted before it. Currently, alloc is the only such
8256 supported instruction. */
8257 /* ??? The bundling automatons should handle this for us, but they do
8258 not yet have support for the first_insn attribute. */
8259 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8261 free_bundle_state (curr_state);
8262 return;
8265 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8266 state_transition (curr_state->dfa_state, NULL);
8267 curr_state->cost++;
8268 if (!try_issue_nops (curr_state, before_nops_num))
8269 return;
8270 if (!try_issue_insn (curr_state, insn))
8271 return;
8272 curr_state->accumulated_insns_num++;
8273 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8274 || asm_noperands (PATTERN (insn)) >= 0)
8276 /* Finish bundle containing asm insn. */
8277 curr_state->after_nops_num
8278 = 3 - curr_state->accumulated_insns_num % 3;
8279 curr_state->accumulated_insns_num
8280 += 3 - curr_state->accumulated_insns_num % 3;
8282 else if (ia64_safe_type (insn) == TYPE_L)
8283 curr_state->accumulated_insns_num++;
8285 if (ia64_safe_type (insn) == TYPE_B)
8286 curr_state->branch_deviation
8287 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8288 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8290 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8292 state_t dfa_state;
8293 struct bundle_state *curr_state1;
8294 struct bundle_state *allocated_states_chain;
8296 curr_state1 = get_free_bundle_state ();
8297 dfa_state = curr_state1->dfa_state;
8298 allocated_states_chain = curr_state1->allocated_states_chain;
8299 *curr_state1 = *curr_state;
8300 curr_state1->dfa_state = dfa_state;
8301 curr_state1->allocated_states_chain = allocated_states_chain;
8302 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8303 dfa_state_size);
8304 curr_state = curr_state1;
8306 if (!try_issue_nops (curr_state,
8307 3 - curr_state->accumulated_insns_num % 3))
8308 return;
8309 curr_state->after_nops_num
8310 = 3 - curr_state->accumulated_insns_num % 3;
8311 curr_state->accumulated_insns_num
8312 += 3 - curr_state->accumulated_insns_num % 3;
8314 if (!insert_bundle_state (curr_state))
8315 free_bundle_state (curr_state);
8316 return;
8319 /* The following function returns position in the two window bundle
8320 for given STATE. */
8322 static int
8323 get_max_pos (state_t state)
8325 if (cpu_unit_reservation_p (state, pos_6))
8326 return 6;
8327 else if (cpu_unit_reservation_p (state, pos_5))
8328 return 5;
8329 else if (cpu_unit_reservation_p (state, pos_4))
8330 return 4;
8331 else if (cpu_unit_reservation_p (state, pos_3))
8332 return 3;
8333 else if (cpu_unit_reservation_p (state, pos_2))
8334 return 2;
8335 else if (cpu_unit_reservation_p (state, pos_1))
8336 return 1;
8337 else
8338 return 0;
8341 /* The function returns code of a possible template for given position
8342 and state. The function should be called only with 2 values of
8343 position equal to 3 or 6. We avoid generating F NOPs by putting
8344 templates containing F insns at the end of the template search
8345 because undocumented anomaly in McKinley derived cores which can
8346 cause stalls if an F-unit insn (including a NOP) is issued within a
8347 six-cycle window after reading certain application registers (such
8348 as ar.bsp). Furthermore, power-considerations also argue against
8349 the use of F-unit instructions unless they're really needed. */
8351 static int
8352 get_template (state_t state, int pos)
8354 switch (pos)
8356 case 3:
8357 if (cpu_unit_reservation_p (state, _0mmi_))
8358 return 1;
8359 else if (cpu_unit_reservation_p (state, _0mii_))
8360 return 0;
8361 else if (cpu_unit_reservation_p (state, _0mmb_))
8362 return 7;
8363 else if (cpu_unit_reservation_p (state, _0mib_))
8364 return 6;
8365 else if (cpu_unit_reservation_p (state, _0mbb_))
8366 return 5;
8367 else if (cpu_unit_reservation_p (state, _0bbb_))
8368 return 4;
8369 else if (cpu_unit_reservation_p (state, _0mmf_))
8370 return 3;
8371 else if (cpu_unit_reservation_p (state, _0mfi_))
8372 return 2;
8373 else if (cpu_unit_reservation_p (state, _0mfb_))
8374 return 8;
8375 else if (cpu_unit_reservation_p (state, _0mlx_))
8376 return 9;
8377 else
8378 gcc_unreachable ();
8379 case 6:
8380 if (cpu_unit_reservation_p (state, _1mmi_))
8381 return 1;
8382 else if (cpu_unit_reservation_p (state, _1mii_))
8383 return 0;
8384 else if (cpu_unit_reservation_p (state, _1mmb_))
8385 return 7;
8386 else if (cpu_unit_reservation_p (state, _1mib_))
8387 return 6;
8388 else if (cpu_unit_reservation_p (state, _1mbb_))
8389 return 5;
8390 else if (cpu_unit_reservation_p (state, _1bbb_))
8391 return 4;
8392 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8393 return 3;
8394 else if (cpu_unit_reservation_p (state, _1mfi_))
8395 return 2;
8396 else if (cpu_unit_reservation_p (state, _1mfb_))
8397 return 8;
8398 else if (cpu_unit_reservation_p (state, _1mlx_))
8399 return 9;
8400 else
8401 gcc_unreachable ();
8402 default:
8403 gcc_unreachable ();
8407 /* True when INSN is important for bundling. */
8408 static bool
8409 important_for_bundling_p (rtx insn)
8411 return (INSN_P (insn)
8412 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8413 && GET_CODE (PATTERN (insn)) != USE
8414 && GET_CODE (PATTERN (insn)) != CLOBBER);
8417 /* The following function returns an insn important for insn bundling
8418 followed by INSN and before TAIL. */
8420 static rtx
8421 get_next_important_insn (rtx insn, rtx tail)
8423 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8424 if (important_for_bundling_p (insn))
8425 return insn;
8426 return NULL_RTX;
8429 /* Add a bundle selector TEMPLATE0 before INSN. */
8431 static void
8432 ia64_add_bundle_selector_before (int template0, rtx insn)
8434 rtx b = gen_bundle_selector (GEN_INT (template0));
8436 ia64_emit_insn_before (b, insn);
8437 #if NR_BUNDLES == 10
8438 if ((template0 == 4 || template0 == 5)
8439 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8441 int i;
8442 rtx note = NULL_RTX;
8444 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8445 first or second slot. If it is and has REG_EH_NOTE set, copy it
8446 to following nops, as br.call sets rp to the address of following
8447 bundle and therefore an EH region end must be on a bundle
8448 boundary. */
8449 insn = PREV_INSN (insn);
8450 for (i = 0; i < 3; i++)
8453 insn = next_active_insn (insn);
8454 while (GET_CODE (insn) == INSN
8455 && get_attr_empty (insn) == EMPTY_YES);
8456 if (GET_CODE (insn) == CALL_INSN)
8457 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8458 else if (note)
8460 int code;
8462 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8463 || code == CODE_FOR_nop_b);
8464 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8465 note = NULL_RTX;
8466 else
8467 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8471 #endif
8474 /* The following function does insn bundling. Bundling means
8475 inserting templates and nop insns to fit insn groups into permitted
8476 templates. Instruction scheduling uses NDFA (non-deterministic
8477 finite automata) encoding informations about the templates and the
8478 inserted nops. Nondeterminism of the automata permits follows
8479 all possible insn sequences very fast.
8481 Unfortunately it is not possible to get information about inserting
8482 nop insns and used templates from the automata states. The
8483 automata only says that we can issue an insn possibly inserting
8484 some nops before it and using some template. Therefore insn
8485 bundling in this function is implemented by using DFA
8486 (deterministic finite automata). We follow all possible insn
8487 sequences by inserting 0-2 nops (that is what the NDFA describe for
8488 insn scheduling) before/after each insn being bundled. We know the
8489 start of simulated processor cycle from insn scheduling (insn
8490 starting a new cycle has TImode).
8492 Simple implementation of insn bundling would create enormous
8493 number of possible insn sequences satisfying information about new
8494 cycle ticks taken from the insn scheduling. To make the algorithm
8495 practical we use dynamic programming. Each decision (about
8496 inserting nops and implicitly about previous decisions) is described
8497 by structure bundle_state (see above). If we generate the same
8498 bundle state (key is automaton state after issuing the insns and
8499 nops for it), we reuse already generated one. As consequence we
8500 reject some decisions which cannot improve the solution and
8501 reduce memory for the algorithm.
8503 When we reach the end of EBB (extended basic block), we choose the
8504 best sequence and then, moving back in EBB, insert templates for
8505 the best alternative. The templates are taken from querying
8506 automaton state for each insn in chosen bundle states.
8508 So the algorithm makes two (forward and backward) passes through
8509 EBB. */
8511 static void
8512 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8514 struct bundle_state *curr_state, *next_state, *best_state;
8515 rtx insn, next_insn;
8516 int insn_num;
8517 int i, bundle_end_p, only_bundle_end_p, asm_p;
8518 int pos = 0, max_pos, template0, template1;
8519 rtx b;
8520 rtx nop;
8521 enum attr_type type;
8523 insn_num = 0;
8524 /* Count insns in the EBB. */
8525 for (insn = NEXT_INSN (prev_head_insn);
8526 insn && insn != tail;
8527 insn = NEXT_INSN (insn))
8528 if (INSN_P (insn))
8529 insn_num++;
8530 if (insn_num == 0)
8531 return;
8532 bundling_p = 1;
8533 dfa_clean_insn_cache ();
8534 initiate_bundle_state_table ();
8535 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8536 /* First (forward) pass -- generation of bundle states. */
8537 curr_state = get_free_bundle_state ();
8538 curr_state->insn = NULL;
8539 curr_state->before_nops_num = 0;
8540 curr_state->after_nops_num = 0;
8541 curr_state->insn_num = 0;
8542 curr_state->cost = 0;
8543 curr_state->accumulated_insns_num = 0;
8544 curr_state->branch_deviation = 0;
8545 curr_state->middle_bundle_stops = 0;
8546 curr_state->next = NULL;
8547 curr_state->originator = NULL;
8548 state_reset (curr_state->dfa_state);
8549 index_to_bundle_states [0] = curr_state;
8550 insn_num = 0;
8551 /* Shift cycle mark if it is put on insn which could be ignored. */
8552 for (insn = NEXT_INSN (prev_head_insn);
8553 insn != tail;
8554 insn = NEXT_INSN (insn))
8555 if (INSN_P (insn)
8556 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8557 || GET_CODE (PATTERN (insn)) == USE
8558 || GET_CODE (PATTERN (insn)) == CLOBBER)
8559 && GET_MODE (insn) == TImode)
8561 PUT_MODE (insn, VOIDmode);
8562 for (next_insn = NEXT_INSN (insn);
8563 next_insn != tail;
8564 next_insn = NEXT_INSN (next_insn))
8565 if (INSN_P (next_insn)
8566 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8567 && GET_CODE (PATTERN (next_insn)) != USE
8568 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8569 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8571 PUT_MODE (next_insn, TImode);
8572 break;
8575 /* Forward pass: generation of bundle states. */
8576 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8577 insn != NULL_RTX;
8578 insn = next_insn)
8580 gcc_assert (INSN_P (insn)
8581 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8582 && GET_CODE (PATTERN (insn)) != USE
8583 && GET_CODE (PATTERN (insn)) != CLOBBER);
8584 type = ia64_safe_type (insn);
8585 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8586 insn_num++;
8587 index_to_bundle_states [insn_num] = NULL;
8588 for (curr_state = index_to_bundle_states [insn_num - 1];
8589 curr_state != NULL;
8590 curr_state = next_state)
8592 pos = curr_state->accumulated_insns_num % 3;
8593 next_state = curr_state->next;
8594 /* We must fill up the current bundle in order to start a
8595 subsequent asm insn in a new bundle. Asm insn is always
8596 placed in a separate bundle. */
8597 only_bundle_end_p
8598 = (next_insn != NULL_RTX
8599 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8600 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8601 /* We may fill up the current bundle if it is the cycle end
8602 without a group barrier. */
8603 bundle_end_p
8604 = (only_bundle_end_p || next_insn == NULL_RTX
8605 || (GET_MODE (next_insn) == TImode
8606 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8607 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8608 || type == TYPE_S)
8609 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8610 only_bundle_end_p);
8611 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8612 only_bundle_end_p);
8613 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8614 only_bundle_end_p);
8616 gcc_assert (index_to_bundle_states [insn_num]);
8617 for (curr_state = index_to_bundle_states [insn_num];
8618 curr_state != NULL;
8619 curr_state = curr_state->next)
8620 if (verbose >= 2 && dump)
8622 /* This structure is taken from generated code of the
8623 pipeline hazard recognizer (see file insn-attrtab.c).
8624 Please don't forget to change the structure if a new
8625 automaton is added to .md file. */
8626 struct DFA_chip
8628 unsigned short one_automaton_state;
8629 unsigned short oneb_automaton_state;
8630 unsigned short two_automaton_state;
8631 unsigned short twob_automaton_state;
8634 fprintf
8635 (dump,
8636 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8637 curr_state->unique_num,
8638 (curr_state->originator == NULL
8639 ? -1 : curr_state->originator->unique_num),
8640 curr_state->cost,
8641 curr_state->before_nops_num, curr_state->after_nops_num,
8642 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8643 curr_state->middle_bundle_stops,
8644 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8645 INSN_UID (insn));
8649 /* We should find a solution because the 2nd insn scheduling has
8650 found one. */
8651 gcc_assert (index_to_bundle_states [insn_num]);
8652 /* Find a state corresponding to the best insn sequence. */
8653 best_state = NULL;
8654 for (curr_state = index_to_bundle_states [insn_num];
8655 curr_state != NULL;
8656 curr_state = curr_state->next)
8657 /* We are just looking at the states with fully filled up last
8658 bundle. The first we prefer insn sequences with minimal cost
8659 then with minimal inserted nops and finally with branch insns
8660 placed in the 3rd slots. */
8661 if (curr_state->accumulated_insns_num % 3 == 0
8662 && (best_state == NULL || best_state->cost > curr_state->cost
8663 || (best_state->cost == curr_state->cost
8664 && (curr_state->accumulated_insns_num
8665 < best_state->accumulated_insns_num
8666 || (curr_state->accumulated_insns_num
8667 == best_state->accumulated_insns_num
8668 && (curr_state->branch_deviation
8669 < best_state->branch_deviation
8670 || (curr_state->branch_deviation
8671 == best_state->branch_deviation
8672 && curr_state->middle_bundle_stops
8673 < best_state->middle_bundle_stops)))))))
8674 best_state = curr_state;
8675 /* Second (backward) pass: adding nops and templates. */
8676 gcc_assert (best_state);
8677 insn_num = best_state->before_nops_num;
8678 template0 = template1 = -1;
8679 for (curr_state = best_state;
8680 curr_state->originator != NULL;
8681 curr_state = curr_state->originator)
8683 insn = curr_state->insn;
8684 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8685 || asm_noperands (PATTERN (insn)) >= 0);
8686 insn_num++;
8687 if (verbose >= 2 && dump)
8689 struct DFA_chip
8691 unsigned short one_automaton_state;
8692 unsigned short oneb_automaton_state;
8693 unsigned short two_automaton_state;
8694 unsigned short twob_automaton_state;
8697 fprintf
8698 (dump,
8699 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8700 curr_state->unique_num,
8701 (curr_state->originator == NULL
8702 ? -1 : curr_state->originator->unique_num),
8703 curr_state->cost,
8704 curr_state->before_nops_num, curr_state->after_nops_num,
8705 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8706 curr_state->middle_bundle_stops,
8707 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8708 INSN_UID (insn));
8710 /* Find the position in the current bundle window. The window can
8711 contain at most two bundles. Two bundle window means that
8712 the processor will make two bundle rotation. */
8713 max_pos = get_max_pos (curr_state->dfa_state);
8714 if (max_pos == 6
8715 /* The following (negative template number) means that the
8716 processor did one bundle rotation. */
8717 || (max_pos == 3 && template0 < 0))
8719 /* We are at the end of the window -- find template(s) for
8720 its bundle(s). */
8721 pos = max_pos;
8722 if (max_pos == 3)
8723 template0 = get_template (curr_state->dfa_state, 3);
8724 else
8726 template1 = get_template (curr_state->dfa_state, 3);
8727 template0 = get_template (curr_state->dfa_state, 6);
8730 if (max_pos > 3 && template1 < 0)
8731 /* It may happen when we have the stop inside a bundle. */
8733 gcc_assert (pos <= 3);
8734 template1 = get_template (curr_state->dfa_state, 3);
8735 pos += 3;
8737 if (!asm_p)
8738 /* Emit nops after the current insn. */
8739 for (i = 0; i < curr_state->after_nops_num; i++)
8741 nop = gen_nop ();
8742 emit_insn_after (nop, insn);
8743 pos--;
8744 gcc_assert (pos >= 0);
8745 if (pos % 3 == 0)
8747 /* We are at the start of a bundle: emit the template
8748 (it should be defined). */
8749 gcc_assert (template0 >= 0);
8750 ia64_add_bundle_selector_before (template0, nop);
8751 /* If we have two bundle window, we make one bundle
8752 rotation. Otherwise template0 will be undefined
8753 (negative value). */
8754 template0 = template1;
8755 template1 = -1;
8758 /* Move the position backward in the window. Group barrier has
8759 no slot. Asm insn takes all bundle. */
8760 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8761 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8762 && asm_noperands (PATTERN (insn)) < 0)
8763 pos--;
8764 /* Long insn takes 2 slots. */
8765 if (ia64_safe_type (insn) == TYPE_L)
8766 pos--;
8767 gcc_assert (pos >= 0);
8768 if (pos % 3 == 0
8769 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8770 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8771 && asm_noperands (PATTERN (insn)) < 0)
8773 /* The current insn is at the bundle start: emit the
8774 template. */
8775 gcc_assert (template0 >= 0);
8776 ia64_add_bundle_selector_before (template0, insn);
8777 b = PREV_INSN (insn);
8778 insn = b;
8779 /* See comment above in analogous place for emitting nops
8780 after the insn. */
8781 template0 = template1;
8782 template1 = -1;
8784 /* Emit nops after the current insn. */
8785 for (i = 0; i < curr_state->before_nops_num; i++)
8787 nop = gen_nop ();
8788 ia64_emit_insn_before (nop, insn);
8789 nop = PREV_INSN (insn);
8790 insn = nop;
8791 pos--;
8792 gcc_assert (pos >= 0);
8793 if (pos % 3 == 0)
8795 /* See comment above in analogous place for emitting nops
8796 after the insn. */
8797 gcc_assert (template0 >= 0);
8798 ia64_add_bundle_selector_before (template0, insn);
8799 b = PREV_INSN (insn);
8800 insn = b;
8801 template0 = template1;
8802 template1 = -1;
8807 #ifdef ENABLE_CHECKING
8809 /* Assert right calculation of middle_bundle_stops. */
8810 int num = best_state->middle_bundle_stops;
8811 bool start_bundle = true, end_bundle = false;
8813 for (insn = NEXT_INSN (prev_head_insn);
8814 insn && insn != tail;
8815 insn = NEXT_INSN (insn))
8817 if (!INSN_P (insn))
8818 continue;
8819 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8820 start_bundle = true;
8821 else
8823 rtx next_insn;
8825 for (next_insn = NEXT_INSN (insn);
8826 next_insn && next_insn != tail;
8827 next_insn = NEXT_INSN (next_insn))
8828 if (INSN_P (next_insn)
8829 && (ia64_safe_itanium_class (next_insn)
8830 != ITANIUM_CLASS_IGNORE
8831 || recog_memoized (next_insn)
8832 == CODE_FOR_bundle_selector)
8833 && GET_CODE (PATTERN (next_insn)) != USE
8834 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8835 break;
8837 end_bundle = next_insn == NULL_RTX
8838 || next_insn == tail
8839 || (INSN_P (next_insn)
8840 && recog_memoized (next_insn)
8841 == CODE_FOR_bundle_selector);
8842 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8843 && !start_bundle && !end_bundle
8844 && next_insn
8845 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8846 && asm_noperands (PATTERN (next_insn)) < 0)
8847 num--;
8849 start_bundle = false;
8853 gcc_assert (num == 0);
8855 #endif
8857 free (index_to_bundle_states);
8858 finish_bundle_state_table ();
8859 bundling_p = 0;
8860 dfa_clean_insn_cache ();
8863 /* The following function is called at the end of scheduling BB or
8864 EBB. After reload, it inserts stop bits and does insn bundling. */
8866 static void
8867 ia64_sched_finish (FILE *dump, int sched_verbose)
8869 if (sched_verbose)
8870 fprintf (dump, "// Finishing schedule.\n");
8871 if (!reload_completed)
8872 return;
8873 if (reload_completed)
8875 final_emit_insn_group_barriers (dump);
8876 bundling (dump, sched_verbose, current_sched_info->prev_head,
8877 current_sched_info->next_tail);
8878 if (sched_verbose && dump)
8879 fprintf (dump, "// finishing %d-%d\n",
8880 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8881 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8883 return;
8887 /* The following function inserts stop bits in scheduled BB or EBB. */
8889 static void
8890 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8892 rtx insn;
8893 int need_barrier_p = 0;
8894 int seen_good_insn = 0;
8895 rtx prev_insn = NULL_RTX;
8897 init_insn_group_barriers ();
8899 for (insn = NEXT_INSN (current_sched_info->prev_head);
8900 insn != current_sched_info->next_tail;
8901 insn = NEXT_INSN (insn))
8903 if (GET_CODE (insn) == BARRIER)
8905 rtx last = prev_active_insn (insn);
8907 if (! last)
8908 continue;
8909 if (GET_CODE (last) == JUMP_INSN
8910 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8911 last = prev_active_insn (last);
8912 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8913 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8915 init_insn_group_barriers ();
8916 seen_good_insn = 0;
8917 need_barrier_p = 0;
8918 prev_insn = NULL_RTX;
8920 else if (NONDEBUG_INSN_P (insn))
8922 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8924 init_insn_group_barriers ();
8925 seen_good_insn = 0;
8926 need_barrier_p = 0;
8927 prev_insn = NULL_RTX;
8929 else if (need_barrier_p || group_barrier_needed (insn)
8930 || (mflag_sched_stop_bits_after_every_cycle
8931 && GET_MODE (insn) == TImode
8932 && seen_good_insn))
8934 if (TARGET_EARLY_STOP_BITS)
8936 rtx last;
8938 for (last = insn;
8939 last != current_sched_info->prev_head;
8940 last = PREV_INSN (last))
8941 if (INSN_P (last) && GET_MODE (last) == TImode
8942 && stops_p [INSN_UID (last)])
8943 break;
8944 if (last == current_sched_info->prev_head)
8945 last = insn;
8946 last = prev_active_insn (last);
8947 if (last
8948 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8949 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8950 last);
8951 init_insn_group_barriers ();
8952 for (last = NEXT_INSN (last);
8953 last != insn;
8954 last = NEXT_INSN (last))
8955 if (INSN_P (last))
8957 group_barrier_needed (last);
8958 if (recog_memoized (last) >= 0
8959 && important_for_bundling_p (last))
8960 seen_good_insn = 1;
8963 else
8965 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8966 insn);
8967 init_insn_group_barriers ();
8968 seen_good_insn = 0;
8970 group_barrier_needed (insn);
8971 if (recog_memoized (insn) >= 0
8972 && important_for_bundling_p (insn))
8973 seen_good_insn = 1;
8974 prev_insn = NULL_RTX;
8976 else if (recog_memoized (insn) >= 0
8977 && important_for_bundling_p (insn))
8979 prev_insn = insn;
8980 seen_good_insn = 1;
8982 need_barrier_p = (GET_CODE (insn) == CALL_INSN
8983 || GET_CODE (PATTERN (insn)) == ASM_INPUT
8984 || asm_noperands (PATTERN (insn)) >= 0);
8991 /* If the following function returns TRUE, we will use the DFA
8992 insn scheduler. */
8994 static int
8995 ia64_first_cycle_multipass_dfa_lookahead (void)
8997 return (reload_completed ? 6 : 4);
9000 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9002 static void
9003 ia64_init_dfa_pre_cycle_insn (void)
9005 if (temp_dfa_state == NULL)
9007 dfa_state_size = state_size ();
9008 temp_dfa_state = xmalloc (dfa_state_size);
9009 prev_cycle_state = xmalloc (dfa_state_size);
9011 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9012 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9013 recog_memoized (dfa_pre_cycle_insn);
9014 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9015 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9016 recog_memoized (dfa_stop_insn);
9019 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9020 used by the DFA insn scheduler. */
9022 static rtx
9023 ia64_dfa_pre_cycle_insn (void)
9025 return dfa_pre_cycle_insn;
9028 /* The following function returns TRUE if PRODUCER (of type ilog or
9029 ld) produces address for CONSUMER (of type st or stf). */
9032 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9034 rtx dest, reg, mem;
9036 gcc_assert (producer && consumer);
9037 dest = ia64_single_set (producer);
9038 gcc_assert (dest);
9039 reg = SET_DEST (dest);
9040 gcc_assert (reg);
9041 if (GET_CODE (reg) == SUBREG)
9042 reg = SUBREG_REG (reg);
9043 gcc_assert (GET_CODE (reg) == REG);
9045 dest = ia64_single_set (consumer);
9046 gcc_assert (dest);
9047 mem = SET_DEST (dest);
9048 gcc_assert (mem && GET_CODE (mem) == MEM);
9049 return reg_mentioned_p (reg, mem);
9052 /* The following function returns TRUE if PRODUCER (of type ilog or
9053 ld) produces address for CONSUMER (of type ld or fld). */
9056 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9058 rtx dest, src, reg, mem;
9060 gcc_assert (producer && consumer);
9061 dest = ia64_single_set (producer);
9062 gcc_assert (dest);
9063 reg = SET_DEST (dest);
9064 gcc_assert (reg);
9065 if (GET_CODE (reg) == SUBREG)
9066 reg = SUBREG_REG (reg);
9067 gcc_assert (GET_CODE (reg) == REG);
9069 src = ia64_single_set (consumer);
9070 gcc_assert (src);
9071 mem = SET_SRC (src);
9072 gcc_assert (mem);
9074 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9075 mem = XVECEXP (mem, 0, 0);
9076 else if (GET_CODE (mem) == IF_THEN_ELSE)
9077 /* ??? Is this bypass necessary for ld.c? */
9079 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9080 mem = XEXP (mem, 1);
9083 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9084 mem = XEXP (mem, 0);
9086 if (GET_CODE (mem) == UNSPEC)
9088 int c = XINT (mem, 1);
9090 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9091 || c == UNSPEC_LDSA);
9092 mem = XVECEXP (mem, 0, 0);
9095 /* Note that LO_SUM is used for GOT loads. */
9096 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9098 return reg_mentioned_p (reg, mem);
9101 /* The following function returns TRUE if INSN produces address for a
9102 load/store insn. We will place such insns into M slot because it
9103 decreases its latency time. */
9106 ia64_produce_address_p (rtx insn)
9108 return insn->call;
9112 /* Emit pseudo-ops for the assembler to describe predicate relations.
9113 At present this assumes that we only consider predicate pairs to
9114 be mutex, and that the assembler can deduce proper values from
9115 straight-line code. */
9117 static void
9118 emit_predicate_relation_info (void)
9120 basic_block bb;
9122 FOR_EACH_BB_REVERSE (bb)
9124 int r;
9125 rtx head = BB_HEAD (bb);
9127 /* We only need such notes at code labels. */
9128 if (GET_CODE (head) != CODE_LABEL)
9129 continue;
9130 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9131 head = NEXT_INSN (head);
9133 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9134 grabbing the entire block of predicate registers. */
9135 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9136 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9138 rtx p = gen_rtx_REG (BImode, r);
9139 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9140 if (head == BB_END (bb))
9141 BB_END (bb) = n;
9142 head = n;
9146 /* Look for conditional calls that do not return, and protect predicate
9147 relations around them. Otherwise the assembler will assume the call
9148 returns, and complain about uses of call-clobbered predicates after
9149 the call. */
9150 FOR_EACH_BB_REVERSE (bb)
9152 rtx insn = BB_HEAD (bb);
9154 while (1)
9156 if (GET_CODE (insn) == CALL_INSN
9157 && GET_CODE (PATTERN (insn)) == COND_EXEC
9158 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9160 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9161 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9162 if (BB_HEAD (bb) == insn)
9163 BB_HEAD (bb) = b;
9164 if (BB_END (bb) == insn)
9165 BB_END (bb) = a;
9168 if (insn == BB_END (bb))
9169 break;
9170 insn = NEXT_INSN (insn);
9175 /* Perform machine dependent operations on the rtl chain INSNS. */
9177 static void
9178 ia64_reorg (void)
9180 /* We are freeing block_for_insn in the toplev to keep compatibility
9181 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9182 compute_bb_for_insn ();
9184 /* If optimizing, we'll have split before scheduling. */
9185 if (optimize == 0)
9186 split_all_insns ();
9188 if (optimize && ia64_flag_schedule_insns2
9189 && dbg_cnt (ia64_sched2))
9191 timevar_push (TV_SCHED2);
9192 ia64_final_schedule = 1;
9194 initiate_bundle_states ();
9195 ia64_nop = make_insn_raw (gen_nop ());
9196 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9197 recog_memoized (ia64_nop);
9198 clocks_length = get_max_uid () + 1;
9199 stops_p = XCNEWVEC (char, clocks_length);
9201 if (ia64_tune == PROCESSOR_ITANIUM2)
9203 pos_1 = get_cpu_unit_code ("2_1");
9204 pos_2 = get_cpu_unit_code ("2_2");
9205 pos_3 = get_cpu_unit_code ("2_3");
9206 pos_4 = get_cpu_unit_code ("2_4");
9207 pos_5 = get_cpu_unit_code ("2_5");
9208 pos_6 = get_cpu_unit_code ("2_6");
9209 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9210 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9211 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9212 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9213 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9214 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9215 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9216 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9217 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9218 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9219 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9220 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9221 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9222 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9223 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9224 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9225 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9226 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9227 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9228 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9230 else
9232 pos_1 = get_cpu_unit_code ("1_1");
9233 pos_2 = get_cpu_unit_code ("1_2");
9234 pos_3 = get_cpu_unit_code ("1_3");
9235 pos_4 = get_cpu_unit_code ("1_4");
9236 pos_5 = get_cpu_unit_code ("1_5");
9237 pos_6 = get_cpu_unit_code ("1_6");
9238 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9239 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9240 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9241 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9242 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9243 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9244 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9245 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9246 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9247 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9248 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9249 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9250 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9251 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9252 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9253 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9254 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9255 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9256 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9257 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9260 if (flag_selective_scheduling2
9261 && !maybe_skip_selective_scheduling ())
9262 run_selective_scheduling ();
9263 else
9264 schedule_ebbs ();
9266 /* Redo alignment computation, as it might gone wrong. */
9267 compute_alignments ();
9269 /* We cannot reuse this one because it has been corrupted by the
9270 evil glat. */
9271 finish_bundle_states ();
9272 free (stops_p);
9273 stops_p = NULL;
9274 emit_insn_group_barriers (dump_file);
9276 ia64_final_schedule = 0;
9277 timevar_pop (TV_SCHED2);
9279 else
9280 emit_all_insn_group_barriers (dump_file);
9282 df_analyze ();
9284 /* A call must not be the last instruction in a function, so that the
9285 return address is still within the function, so that unwinding works
9286 properly. Note that IA-64 differs from dwarf2 on this point. */
9287 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9289 rtx insn;
9290 int saw_stop = 0;
9292 insn = get_last_insn ();
9293 if (! INSN_P (insn))
9294 insn = prev_active_insn (insn);
9295 if (insn)
9297 /* Skip over insns that expand to nothing. */
9298 while (GET_CODE (insn) == INSN
9299 && get_attr_empty (insn) == EMPTY_YES)
9301 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9302 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9303 saw_stop = 1;
9304 insn = prev_active_insn (insn);
9306 if (GET_CODE (insn) == CALL_INSN)
9308 if (! saw_stop)
9309 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9310 emit_insn (gen_break_f ());
9311 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9316 emit_predicate_relation_info ();
9318 if (ia64_flag_var_tracking)
9320 timevar_push (TV_VAR_TRACKING);
9321 variable_tracking_main ();
9322 timevar_pop (TV_VAR_TRACKING);
9324 df_finish_pass (false);
9327 /* Return true if REGNO is used by the epilogue. */
9330 ia64_epilogue_uses (int regno)
9332 switch (regno)
9334 case R_GR (1):
9335 /* With a call to a function in another module, we will write a new
9336 value to "gp". After returning from such a call, we need to make
9337 sure the function restores the original gp-value, even if the
9338 function itself does not use the gp anymore. */
9339 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9341 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9342 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9343 /* For functions defined with the syscall_linkage attribute, all
9344 input registers are marked as live at all function exits. This
9345 prevents the register allocator from using the input registers,
9346 which in turn makes it possible to restart a system call after
9347 an interrupt without having to save/restore the input registers.
9348 This also prevents kernel data from leaking to application code. */
9349 return lookup_attribute ("syscall_linkage",
9350 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9352 case R_BR (0):
9353 /* Conditional return patterns can't represent the use of `b0' as
9354 the return address, so we force the value live this way. */
9355 return 1;
9357 case AR_PFS_REGNUM:
9358 /* Likewise for ar.pfs, which is used by br.ret. */
9359 return 1;
9361 default:
9362 return 0;
9366 /* Return true if REGNO is used by the frame unwinder. */
9369 ia64_eh_uses (int regno)
9371 unsigned int r;
9373 if (! reload_completed)
9374 return 0;
9376 if (regno == 0)
9377 return 0;
9379 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9380 if (regno == current_frame_info.r[r]
9381 || regno == emitted_frame_related_regs[r])
9382 return 1;
9384 return 0;
9387 /* Return true if this goes in small data/bss. */
9389 /* ??? We could also support own long data here. Generating movl/add/ld8
9390 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9391 code faster because there is one less load. This also includes incomplete
9392 types which can't go in sdata/sbss. */
9394 static bool
9395 ia64_in_small_data_p (const_tree exp)
9397 if (TARGET_NO_SDATA)
9398 return false;
9400 /* We want to merge strings, so we never consider them small data. */
9401 if (TREE_CODE (exp) == STRING_CST)
9402 return false;
9404 /* Functions are never small data. */
9405 if (TREE_CODE (exp) == FUNCTION_DECL)
9406 return false;
9408 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9410 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9412 if (strcmp (section, ".sdata") == 0
9413 || strncmp (section, ".sdata.", 7) == 0
9414 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9415 || strcmp (section, ".sbss") == 0
9416 || strncmp (section, ".sbss.", 6) == 0
9417 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9418 return true;
9420 else
9422 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9424 /* If this is an incomplete type with size 0, then we can't put it
9425 in sdata because it might be too big when completed. */
9426 if (size > 0 && size <= ia64_section_threshold)
9427 return true;
9430 return false;
9433 /* Output assembly directives for prologue regions. */
9435 /* The current basic block number. */
9437 static bool last_block;
9439 /* True if we need a copy_state command at the start of the next block. */
9441 static bool need_copy_state;
9443 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9444 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9445 #endif
9447 /* Emit a debugging label after a call-frame-related insn. We'd
9448 rather output the label right away, but we'd have to output it
9449 after, not before, the instruction, and the instruction has not
9450 been output yet. So we emit the label after the insn, delete it to
9451 avoid introducing basic blocks, and mark it as preserved, such that
9452 it is still output, given that it is referenced in debug info. */
9454 static const char *
9455 ia64_emit_deleted_label_after_insn (rtx insn)
9457 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9458 rtx lb = gen_label_rtx ();
9459 rtx label_insn = emit_label_after (lb, insn);
9461 LABEL_PRESERVE_P (lb) = 1;
9463 delete_insn (label_insn);
9465 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9467 return xstrdup (label);
9470 /* Define the CFA after INSN with the steady-state definition. */
9472 static void
9473 ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9475 rtx fp = frame_pointer_needed
9476 ? hard_frame_pointer_rtx
9477 : stack_pointer_rtx;
9478 const char *label = ia64_emit_deleted_label_after_insn (insn);
9480 if (!frame)
9481 return;
9483 dwarf2out_def_cfa
9484 (label, REGNO (fp),
9485 ia64_initial_elimination_offset
9486 (REGNO (arg_pointer_rtx), REGNO (fp))
9487 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9490 /* The generic dwarf2 frame debug info generator does not define a
9491 separate region for the very end of the epilogue, so refrain from
9492 doing so in the IA64-specific code as well. */
9494 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9496 /* The function emits unwind directives for the start of an epilogue. */
9498 static void
9499 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9501 /* If this isn't the last block of the function, then we need to label the
9502 current state, and copy it back in at the start of the next block. */
9504 if (!last_block)
9506 if (unwind)
9507 fprintf (asm_out_file, "\t.label_state %d\n",
9508 ++cfun->machine->state_num);
9509 need_copy_state = true;
9512 if (unwind)
9513 fprintf (asm_out_file, "\t.restore sp\n");
9514 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9515 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9516 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9519 /* This function processes a SET pattern looking for specific patterns
9520 which result in emitting an assembly directive required for unwinding. */
9522 static int
9523 process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
9525 rtx src = SET_SRC (pat);
9526 rtx dest = SET_DEST (pat);
9527 int src_regno, dest_regno;
9529 /* Look for the ALLOC insn. */
9530 if (GET_CODE (src) == UNSPEC_VOLATILE
9531 && XINT (src, 1) == UNSPECV_ALLOC
9532 && GET_CODE (dest) == REG)
9534 dest_regno = REGNO (dest);
9536 /* If this is the final destination for ar.pfs, then this must
9537 be the alloc in the prologue. */
9538 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
9540 if (unwind)
9541 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9542 ia64_dbx_register_number (dest_regno));
9544 else
9546 /* This must be an alloc before a sibcall. We must drop the
9547 old frame info. The easiest way to drop the old frame
9548 info is to ensure we had a ".restore sp" directive
9549 followed by a new prologue. If the procedure doesn't
9550 have a memory-stack frame, we'll issue a dummy ".restore
9551 sp" now. */
9552 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
9553 /* if haven't done process_epilogue() yet, do it now */
9554 process_epilogue (asm_out_file, insn, unwind, frame);
9555 if (unwind)
9556 fprintf (asm_out_file, "\t.prologue\n");
9558 return 1;
9561 /* Look for SP = .... */
9562 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9564 if (GET_CODE (src) == PLUS)
9566 rtx op0 = XEXP (src, 0);
9567 rtx op1 = XEXP (src, 1);
9569 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9571 if (INTVAL (op1) < 0)
9573 gcc_assert (!frame_pointer_needed);
9574 if (unwind)
9575 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9576 -INTVAL (op1));
9577 ia64_dwarf2out_def_steady_cfa (insn, frame);
9579 else
9580 process_epilogue (asm_out_file, insn, unwind, frame);
9582 else
9584 gcc_assert (GET_CODE (src) == REG
9585 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
9586 process_epilogue (asm_out_file, insn, unwind, frame);
9589 return 1;
9592 /* Register move we need to look at. */
9593 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9595 src_regno = REGNO (src);
9596 dest_regno = REGNO (dest);
9598 switch (src_regno)
9600 case BR_REG (0):
9601 /* Saving return address pointer. */
9602 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9603 if (unwind)
9604 fprintf (asm_out_file, "\t.save rp, r%d\n",
9605 ia64_dbx_register_number (dest_regno));
9606 return 1;
9608 case PR_REG (0):
9609 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9610 if (unwind)
9611 fprintf (asm_out_file, "\t.save pr, r%d\n",
9612 ia64_dbx_register_number (dest_regno));
9613 return 1;
9615 case AR_UNAT_REGNUM:
9616 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9617 if (unwind)
9618 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9619 ia64_dbx_register_number (dest_regno));
9620 return 1;
9622 case AR_LC_REGNUM:
9623 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9624 if (unwind)
9625 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9626 ia64_dbx_register_number (dest_regno));
9627 return 1;
9629 case STACK_POINTER_REGNUM:
9630 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9631 && frame_pointer_needed);
9632 if (unwind)
9633 fprintf (asm_out_file, "\t.vframe r%d\n",
9634 ia64_dbx_register_number (dest_regno));
9635 ia64_dwarf2out_def_steady_cfa (insn, frame);
9636 return 1;
9638 default:
9639 /* Everything else should indicate being stored to memory. */
9640 gcc_unreachable ();
9644 /* Memory store we need to look at. */
9645 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
9647 long off;
9648 rtx base;
9649 const char *saveop;
9651 if (GET_CODE (XEXP (dest, 0)) == REG)
9653 base = XEXP (dest, 0);
9654 off = 0;
9656 else
9658 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9659 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9660 base = XEXP (XEXP (dest, 0), 0);
9661 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9664 if (base == hard_frame_pointer_rtx)
9666 saveop = ".savepsp";
9667 off = - off;
9669 else
9671 gcc_assert (base == stack_pointer_rtx);
9672 saveop = ".savesp";
9675 src_regno = REGNO (src);
9676 switch (src_regno)
9678 case BR_REG (0):
9679 gcc_assert (!current_frame_info.r[reg_save_b0]);
9680 if (unwind)
9681 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
9682 return 1;
9684 case PR_REG (0):
9685 gcc_assert (!current_frame_info.r[reg_save_pr]);
9686 if (unwind)
9687 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
9688 return 1;
9690 case AR_LC_REGNUM:
9691 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9692 if (unwind)
9693 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
9694 return 1;
9696 case AR_PFS_REGNUM:
9697 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9698 if (unwind)
9699 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9700 return 1;
9702 case AR_UNAT_REGNUM:
9703 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9704 if (unwind)
9705 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9706 return 1;
9708 case GR_REG (4):
9709 case GR_REG (5):
9710 case GR_REG (6):
9711 case GR_REG (7):
9712 if (unwind)
9713 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9714 1 << (src_regno - GR_REG (4)));
9715 return 1;
9717 case BR_REG (1):
9718 case BR_REG (2):
9719 case BR_REG (3):
9720 case BR_REG (4):
9721 case BR_REG (5):
9722 if (unwind)
9723 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9724 1 << (src_regno - BR_REG (1)));
9725 return 1;
9727 case FR_REG (2):
9728 case FR_REG (3):
9729 case FR_REG (4):
9730 case FR_REG (5):
9731 if (unwind)
9732 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9733 1 << (src_regno - FR_REG (2)));
9734 return 1;
9736 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9737 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9738 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9739 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9740 if (unwind)
9741 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9742 1 << (src_regno - FR_REG (12)));
9743 return 1;
9745 default:
9746 return 0;
9750 return 0;
9754 /* This function looks at a single insn and emits any directives
9755 required to unwind this insn. */
9756 void
9757 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9759 bool unwind = (flag_unwind_tables
9760 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9761 bool frame = dwarf2out_do_frame ();
9763 if (unwind || frame)
9765 rtx pat;
9767 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9769 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9771 /* Restore unwind state from immediately before the epilogue. */
9772 if (need_copy_state)
9774 if (unwind)
9776 fprintf (asm_out_file, "\t.body\n");
9777 fprintf (asm_out_file, "\t.copy_state %d\n",
9778 cfun->machine->state_num);
9780 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9781 ia64_dwarf2out_def_steady_cfa (insn, frame);
9782 need_copy_state = false;
9786 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9787 return;
9789 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9790 if (pat)
9791 pat = XEXP (pat, 0);
9792 else
9793 pat = PATTERN (insn);
9795 switch (GET_CODE (pat))
9797 case SET:
9798 process_set (asm_out_file, pat, insn, unwind, frame);
9799 break;
9801 case PARALLEL:
9803 int par_index;
9804 int limit = XVECLEN (pat, 0);
9805 for (par_index = 0; par_index < limit; par_index++)
9807 rtx x = XVECEXP (pat, 0, par_index);
9808 if (GET_CODE (x) == SET)
9809 process_set (asm_out_file, x, insn, unwind, frame);
9811 break;
9814 default:
9815 gcc_unreachable ();
9821 enum ia64_builtins
9823 IA64_BUILTIN_BSP,
9824 IA64_BUILTIN_COPYSIGNQ,
9825 IA64_BUILTIN_FABSQ,
9826 IA64_BUILTIN_FLUSHRS,
9827 IA64_BUILTIN_INFQ,
9828 IA64_BUILTIN_HUGE_VALQ
9831 void
9832 ia64_init_builtins (void)
9834 tree fpreg_type;
9835 tree float80_type;
9837 /* The __fpreg type. */
9838 fpreg_type = make_node (REAL_TYPE);
9839 TYPE_PRECISION (fpreg_type) = 82;
9840 layout_type (fpreg_type);
9841 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9843 /* The __float80 type. */
9844 float80_type = make_node (REAL_TYPE);
9845 TYPE_PRECISION (float80_type) = 80;
9846 layout_type (float80_type);
9847 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9849 /* The __float128 type. */
9850 if (!TARGET_HPUX)
9852 tree ftype, decl;
9853 tree float128_type = make_node (REAL_TYPE);
9855 TYPE_PRECISION (float128_type) = 128;
9856 layout_type (float128_type);
9857 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9859 /* TFmode support builtins. */
9860 ftype = build_function_type (float128_type, void_list_node);
9861 add_builtin_function ("__builtin_infq", ftype,
9862 IA64_BUILTIN_INFQ, BUILT_IN_MD,
9863 NULL, NULL_TREE);
9865 add_builtin_function ("__builtin_huge_valq", ftype,
9866 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
9867 NULL, NULL_TREE);
9869 ftype = build_function_type_list (float128_type,
9870 float128_type,
9871 NULL_TREE);
9872 decl = add_builtin_function ("__builtin_fabsq", ftype,
9873 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
9874 "__fabstf2", NULL_TREE);
9875 TREE_READONLY (decl) = 1;
9877 ftype = build_function_type_list (float128_type,
9878 float128_type,
9879 float128_type,
9880 NULL_TREE);
9881 decl = add_builtin_function ("__builtin_copysignq", ftype,
9882 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
9883 "__copysigntf3", NULL_TREE);
9884 TREE_READONLY (decl) = 1;
9886 else
9887 /* Under HPUX, this is a synonym for "long double". */
9888 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9889 "__float128");
9891 /* Fwrite on VMS is non-standard. */
9892 if (TARGET_ABI_OPEN_VMS)
9894 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
9895 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
9898 #define def_builtin(name, type, code) \
9899 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9900 NULL, NULL_TREE)
9902 def_builtin ("__builtin_ia64_bsp",
9903 build_function_type (ptr_type_node, void_list_node),
9904 IA64_BUILTIN_BSP);
9906 def_builtin ("__builtin_ia64_flushrs",
9907 build_function_type (void_type_node, void_list_node),
9908 IA64_BUILTIN_FLUSHRS);
9910 #undef def_builtin
9912 if (TARGET_HPUX)
9914 if (built_in_decls [BUILT_IN_FINITE])
9915 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9916 "_Isfinite");
9917 if (built_in_decls [BUILT_IN_FINITEF])
9918 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9919 "_Isfinitef");
9920 if (built_in_decls [BUILT_IN_FINITEL])
9921 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9922 "_Isfinitef128");
9927 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9928 enum machine_mode mode ATTRIBUTE_UNUSED,
9929 int ignore ATTRIBUTE_UNUSED)
9931 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9932 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9934 switch (fcode)
9936 case IA64_BUILTIN_BSP:
9937 if (! target || ! register_operand (target, DImode))
9938 target = gen_reg_rtx (DImode);
9939 emit_insn (gen_bsp_value (target));
9940 #ifdef POINTERS_EXTEND_UNSIGNED
9941 target = convert_memory_address (ptr_mode, target);
9942 #endif
9943 return target;
9945 case IA64_BUILTIN_FLUSHRS:
9946 emit_insn (gen_flushrs ());
9947 return const0_rtx;
9949 case IA64_BUILTIN_INFQ:
9950 case IA64_BUILTIN_HUGE_VALQ:
9952 REAL_VALUE_TYPE inf;
9953 rtx tmp;
9955 real_inf (&inf);
9956 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
9958 tmp = validize_mem (force_const_mem (mode, tmp));
9960 if (target == 0)
9961 target = gen_reg_rtx (mode);
9963 emit_move_insn (target, tmp);
9964 return target;
9967 case IA64_BUILTIN_FABSQ:
9968 case IA64_BUILTIN_COPYSIGNQ:
9969 return expand_call (exp, target, ignore);
9971 default:
9972 gcc_unreachable ();
9975 return NULL_RTX;
9978 /* For the HP-UX IA64 aggregate parameters are passed stored in the
9979 most significant bits of the stack slot. */
9981 enum direction
9982 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
9984 /* Exception to normal case for structures/unions/etc. */
9986 if (type && AGGREGATE_TYPE_P (type)
9987 && int_size_in_bytes (type) < UNITS_PER_WORD)
9988 return upward;
9990 /* Fall back to the default. */
9991 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9994 /* Emit text to declare externally defined variables and functions, because
9995 the Intel assembler does not support undefined externals. */
9997 void
9998 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10000 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10001 set in order to avoid putting out names that are never really
10002 used. */
10003 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10005 /* maybe_assemble_visibility will return 1 if the assembler
10006 visibility directive is output. */
10007 int need_visibility = ((*targetm.binds_local_p) (decl)
10008 && maybe_assemble_visibility (decl));
10010 #ifdef DO_CRTL_NAMES
10011 DO_CRTL_NAMES;
10012 #endif
10014 /* GNU as does not need anything here, but the HP linker does
10015 need something for external functions. */
10016 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10017 && TREE_CODE (decl) == FUNCTION_DECL)
10018 (*targetm.asm_out.globalize_decl_name) (file, decl);
10019 else if (need_visibility && !TARGET_GNU_AS)
10020 (*targetm.asm_out.globalize_label) (file, name);
10024 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10025 modes of word_mode and larger. Rename the TFmode libfuncs using the
10026 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10027 backward compatibility. */
10029 static void
10030 ia64_init_libfuncs (void)
10032 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10033 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10034 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10035 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10037 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10038 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10039 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10040 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10041 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10043 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10044 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10045 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10046 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10047 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10048 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10050 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10051 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10052 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10053 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10054 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10056 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10057 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10058 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10059 /* HP-UX 11.23 libc does not have a function for unsigned
10060 SImode-to-TFmode conversion. */
10061 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10064 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10066 static void
10067 ia64_hpux_init_libfuncs (void)
10069 ia64_init_libfuncs ();
10071 /* The HP SI millicode division and mod functions expect DI arguments.
10072 By turning them off completely we avoid using both libgcc and the
10073 non-standard millicode routines and use the HP DI millicode routines
10074 instead. */
10076 set_optab_libfunc (sdiv_optab, SImode, 0);
10077 set_optab_libfunc (udiv_optab, SImode, 0);
10078 set_optab_libfunc (smod_optab, SImode, 0);
10079 set_optab_libfunc (umod_optab, SImode, 0);
10081 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10082 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10083 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10084 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10086 /* HP-UX libc has TF min/max/abs routines in it. */
10087 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10088 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10089 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10091 /* ia64_expand_compare uses this. */
10092 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10094 /* These should never be used. */
10095 set_optab_libfunc (eq_optab, TFmode, 0);
10096 set_optab_libfunc (ne_optab, TFmode, 0);
10097 set_optab_libfunc (gt_optab, TFmode, 0);
10098 set_optab_libfunc (ge_optab, TFmode, 0);
10099 set_optab_libfunc (lt_optab, TFmode, 0);
10100 set_optab_libfunc (le_optab, TFmode, 0);
10103 /* Rename the division and modulus functions in VMS. */
10105 static void
10106 ia64_vms_init_libfuncs (void)
10108 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10109 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10110 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10111 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10112 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10113 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10114 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10115 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10116 abort_libfunc = init_one_libfunc ("decc$abort");
10117 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10118 #ifdef MEM_LIBFUNCS_INIT
10119 MEM_LIBFUNCS_INIT;
10120 #endif
10123 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10124 the HPUX conventions. */
10126 static void
10127 ia64_sysv4_init_libfuncs (void)
10129 ia64_init_libfuncs ();
10131 /* These functions are not part of the HPUX TFmode interface. We
10132 use them instead of _U_Qfcmp, which doesn't work the way we
10133 expect. */
10134 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10135 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10136 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10137 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10138 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10139 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10141 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10142 glibc doesn't have them. */
10145 /* Use soft-fp. */
10147 static void
10148 ia64_soft_fp_init_libfuncs (void)
10152 static bool
10153 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10155 return (mode == SImode || mode == DImode);
10158 /* For HPUX, it is illegal to have relocations in shared segments. */
10160 static int
10161 ia64_hpux_reloc_rw_mask (void)
10163 return 3;
10166 /* For others, relax this so that relocations to local data goes in
10167 read-only segments, but we still cannot allow global relocations
10168 in read-only segments. */
10170 static int
10171 ia64_reloc_rw_mask (void)
10173 return flag_pic ? 3 : 2;
10176 /* Return the section to use for X. The only special thing we do here
10177 is to honor small data. */
10179 static section *
10180 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10181 unsigned HOST_WIDE_INT align)
10183 if (GET_MODE_SIZE (mode) > 0
10184 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10185 && !TARGET_NO_SDATA)
10186 return sdata_section;
10187 else
10188 return default_elf_select_rtx_section (mode, x, align);
10191 static unsigned int
10192 ia64_section_type_flags (tree decl, const char *name, int reloc)
10194 unsigned int flags = 0;
10196 if (strcmp (name, ".sdata") == 0
10197 || strncmp (name, ".sdata.", 7) == 0
10198 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10199 || strncmp (name, ".sdata2.", 8) == 0
10200 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10201 || strcmp (name, ".sbss") == 0
10202 || strncmp (name, ".sbss.", 6) == 0
10203 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10204 flags = SECTION_SMALL;
10206 #if TARGET_ABI_OPEN_VMS
10207 if (decl && DECL_ATTRIBUTES (decl)
10208 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10209 flags |= SECTION_VMS_OVERLAY;
10210 #endif
10212 flags |= default_section_type_flags (decl, name, reloc);
10213 return flags;
10216 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10217 structure type and that the address of that type should be passed
10218 in out0, rather than in r8. */
10220 static bool
10221 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10223 tree ret_type = TREE_TYPE (fntype);
10225 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10226 as the structure return address parameter, if the return value
10227 type has a non-trivial copy constructor or destructor. It is not
10228 clear if this same convention should be used for other
10229 programming languages. Until G++ 3.4, we incorrectly used r8 for
10230 these return values. */
10231 return (abi_version_at_least (2)
10232 && ret_type
10233 && TYPE_MODE (ret_type) == BLKmode
10234 && TREE_ADDRESSABLE (ret_type)
10235 && strcmp (lang_hooks.name, "GNU C++") == 0);
10238 /* Output the assembler code for a thunk function. THUNK_DECL is the
10239 declaration for the thunk function itself, FUNCTION is the decl for
10240 the target function. DELTA is an immediate constant offset to be
10241 added to THIS. If VCALL_OFFSET is nonzero, the word at
10242 *(*this + vcall_offset) should be added to THIS. */
10244 static void
10245 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10246 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10247 tree function)
10249 rtx this_rtx, insn, funexp;
10250 unsigned int this_parmno;
10251 unsigned int this_regno;
10252 rtx delta_rtx;
10254 reload_completed = 1;
10255 epilogue_completed = 1;
10257 /* Set things up as ia64_expand_prologue might. */
10258 last_scratch_gr_reg = 15;
10260 memset (&current_frame_info, 0, sizeof (current_frame_info));
10261 current_frame_info.spill_cfa_off = -16;
10262 current_frame_info.n_input_regs = 1;
10263 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10265 /* Mark the end of the (empty) prologue. */
10266 emit_note (NOTE_INSN_PROLOGUE_END);
10268 /* Figure out whether "this" will be the first parameter (the
10269 typical case) or the second parameter (as happens when the
10270 virtual function returns certain class objects). */
10271 this_parmno
10272 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10273 ? 1 : 0);
10274 this_regno = IN_REG (this_parmno);
10275 if (!TARGET_REG_NAMES)
10276 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10278 this_rtx = gen_rtx_REG (Pmode, this_regno);
10280 /* Apply the constant offset, if required. */
10281 delta_rtx = GEN_INT (delta);
10282 if (TARGET_ILP32)
10284 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10285 REG_POINTER (tmp) = 1;
10286 if (delta && satisfies_constraint_I (delta_rtx))
10288 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10289 delta = 0;
10291 else
10292 emit_insn (gen_ptr_extend (this_rtx, tmp));
10294 if (delta)
10296 if (!satisfies_constraint_I (delta_rtx))
10298 rtx tmp = gen_rtx_REG (Pmode, 2);
10299 emit_move_insn (tmp, delta_rtx);
10300 delta_rtx = tmp;
10302 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10305 /* Apply the offset from the vtable, if required. */
10306 if (vcall_offset)
10308 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10309 rtx tmp = gen_rtx_REG (Pmode, 2);
10311 if (TARGET_ILP32)
10313 rtx t = gen_rtx_REG (ptr_mode, 2);
10314 REG_POINTER (t) = 1;
10315 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10316 if (satisfies_constraint_I (vcall_offset_rtx))
10318 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10319 vcall_offset = 0;
10321 else
10322 emit_insn (gen_ptr_extend (tmp, t));
10324 else
10325 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10327 if (vcall_offset)
10329 if (!satisfies_constraint_J (vcall_offset_rtx))
10331 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10332 emit_move_insn (tmp2, vcall_offset_rtx);
10333 vcall_offset_rtx = tmp2;
10335 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10338 if (TARGET_ILP32)
10339 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10340 else
10341 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10343 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10346 /* Generate a tail call to the target function. */
10347 if (! TREE_USED (function))
10349 assemble_external (function);
10350 TREE_USED (function) = 1;
10352 funexp = XEXP (DECL_RTL (function), 0);
10353 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10354 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10355 insn = get_last_insn ();
10356 SIBLING_CALL_P (insn) = 1;
10358 /* Code generation for calls relies on splitting. */
10359 reload_completed = 1;
10360 epilogue_completed = 1;
10361 try_split (PATTERN (insn), insn, 0);
10363 emit_barrier ();
10365 /* Run just enough of rest_of_compilation to get the insns emitted.
10366 There's not really enough bulk here to make other passes such as
10367 instruction scheduling worth while. Note that use_thunk calls
10368 assemble_start_function and assemble_end_function. */
10370 insn_locators_alloc ();
10371 emit_all_insn_group_barriers (NULL);
10372 insn = get_insns ();
10373 shorten_branches (insn);
10374 final_start_function (insn, file, 1);
10375 final (insn, file, 1);
10376 final_end_function ();
10378 reload_completed = 0;
10379 epilogue_completed = 0;
10382 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10384 static rtx
10385 ia64_struct_value_rtx (tree fntype,
10386 int incoming ATTRIBUTE_UNUSED)
10388 if (TARGET_ABI_OPEN_VMS ||
10389 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10390 return NULL_RTX;
10391 return gen_rtx_REG (Pmode, GR_REG (8));
10394 static bool
10395 ia64_scalar_mode_supported_p (enum machine_mode mode)
10397 switch (mode)
10399 case QImode:
10400 case HImode:
10401 case SImode:
10402 case DImode:
10403 case TImode:
10404 return true;
10406 case SFmode:
10407 case DFmode:
10408 case XFmode:
10409 case RFmode:
10410 return true;
10412 case TFmode:
10413 return true;
10415 default:
10416 return false;
10420 static bool
10421 ia64_vector_mode_supported_p (enum machine_mode mode)
10423 switch (mode)
10425 case V8QImode:
10426 case V4HImode:
10427 case V2SImode:
10428 return true;
10430 case V2SFmode:
10431 return true;
10433 default:
10434 return false;
10438 /* Implement the FUNCTION_PROFILER macro. */
10440 void
10441 ia64_output_function_profiler (FILE *file, int labelno)
10443 bool indirect_call;
10445 /* If the function needs a static chain and the static chain
10446 register is r15, we use an indirect call so as to bypass
10447 the PLT stub in case the executable is dynamically linked,
10448 because the stub clobbers r15 as per 5.3.6 of the psABI.
10449 We don't need to do that in non canonical PIC mode. */
10451 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10453 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10454 indirect_call = true;
10456 else
10457 indirect_call = false;
10459 if (TARGET_GNU_AS)
10460 fputs ("\t.prologue 4, r40\n", file);
10461 else
10462 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10463 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10465 if (NO_PROFILE_COUNTERS)
10466 fputs ("\tmov out3 = r0\n", file);
10467 else
10469 char buf[20];
10470 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10472 if (TARGET_AUTO_PIC)
10473 fputs ("\tmovl out3 = @gprel(", file);
10474 else
10475 fputs ("\taddl out3 = @ltoff(", file);
10476 assemble_name (file, buf);
10477 if (TARGET_AUTO_PIC)
10478 fputs (")\n", file);
10479 else
10480 fputs ("), r1\n", file);
10483 if (indirect_call)
10484 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10485 fputs ("\t;;\n", file);
10487 fputs ("\t.save rp, r42\n", file);
10488 fputs ("\tmov out2 = b0\n", file);
10489 if (indirect_call)
10490 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10491 fputs ("\t.body\n", file);
10492 fputs ("\tmov out1 = r1\n", file);
10493 if (indirect_call)
10495 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10496 fputs ("\tmov b6 = r16\n", file);
10497 fputs ("\tld8 r1 = [r14]\n", file);
10498 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10500 else
10501 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10504 static GTY(()) rtx mcount_func_rtx;
10505 static rtx
10506 gen_mcount_func_rtx (void)
10508 if (!mcount_func_rtx)
10509 mcount_func_rtx = init_one_libfunc ("_mcount");
10510 return mcount_func_rtx;
10513 void
10514 ia64_profile_hook (int labelno)
10516 rtx label, ip;
10518 if (NO_PROFILE_COUNTERS)
10519 label = const0_rtx;
10520 else
10522 char buf[30];
10523 const char *label_name;
10524 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10525 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10526 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10527 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10529 ip = gen_reg_rtx (Pmode);
10530 emit_insn (gen_ip_value (ip));
10531 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10532 VOIDmode, 3,
10533 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10534 ip, Pmode,
10535 label, Pmode);
10538 /* Return the mangling of TYPE if it is an extended fundamental type. */
10540 static const char *
10541 ia64_mangle_type (const_tree type)
10543 type = TYPE_MAIN_VARIANT (type);
10545 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10546 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10547 return NULL;
10549 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10550 mangled as "e". */
10551 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10552 return "g";
10553 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10554 an extended mangling. Elsewhere, "e" is available since long
10555 double is 80 bits. */
10556 if (TYPE_MODE (type) == XFmode)
10557 return TARGET_HPUX ? "u9__float80" : "e";
10558 if (TYPE_MODE (type) == RFmode)
10559 return "u7__fpreg";
10560 return NULL;
10563 /* Return the diagnostic message string if conversion from FROMTYPE to
10564 TOTYPE is not allowed, NULL otherwise. */
10565 static const char *
10566 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10568 /* Reject nontrivial conversion to or from __fpreg. */
10569 if (TYPE_MODE (fromtype) == RFmode
10570 && TYPE_MODE (totype) != RFmode
10571 && TYPE_MODE (totype) != VOIDmode)
10572 return N_("invalid conversion from %<__fpreg%>");
10573 if (TYPE_MODE (totype) == RFmode
10574 && TYPE_MODE (fromtype) != RFmode)
10575 return N_("invalid conversion to %<__fpreg%>");
10576 return NULL;
10579 /* Return the diagnostic message string if the unary operation OP is
10580 not permitted on TYPE, NULL otherwise. */
10581 static const char *
10582 ia64_invalid_unary_op (int op, const_tree type)
10584 /* Reject operations on __fpreg other than unary + or &. */
10585 if (TYPE_MODE (type) == RFmode
10586 && op != CONVERT_EXPR
10587 && op != ADDR_EXPR)
10588 return N_("invalid operation on %<__fpreg%>");
10589 return NULL;
10592 /* Return the diagnostic message string if the binary operation OP is
10593 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10594 static const char *
10595 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10597 /* Reject operations on __fpreg. */
10598 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10599 return N_("invalid operation on %<__fpreg%>");
10600 return NULL;
10603 /* Implement overriding of the optimization options. */
10604 void
10605 ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10606 int size ATTRIBUTE_UNUSED)
10608 /* Let the scheduler form additional regions. */
10609 set_param_value ("max-sched-extend-regions-iters", 2);
10611 /* Set the default values for cache-related parameters. */
10612 set_param_value ("simultaneous-prefetches", 6);
10613 set_param_value ("l1-cache-line-size", 32);
10615 set_param_value("sched-mem-true-dep-cost", 4);
10618 /* HP-UX version_id attribute.
10619 For object foo, if the version_id is set to 1234 put out an alias
10620 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10621 other than an alias statement because it is an illegal symbol name. */
10623 static tree
10624 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10625 tree name ATTRIBUTE_UNUSED,
10626 tree args,
10627 int flags ATTRIBUTE_UNUSED,
10628 bool *no_add_attrs)
10630 tree arg = TREE_VALUE (args);
10632 if (TREE_CODE (arg) != STRING_CST)
10634 error("version attribute is not a string");
10635 *no_add_attrs = true;
10636 return NULL_TREE;
10638 return NULL_TREE;
10641 /* Target hook for c_mode_for_suffix. */
10643 static enum machine_mode
10644 ia64_c_mode_for_suffix (char suffix)
10646 if (suffix == 'q')
10647 return TFmode;
10648 if (suffix == 'w')
10649 return XFmode;
10651 return VOIDmode;
10654 static enum machine_mode
10655 ia64_promote_function_mode (const_tree type,
10656 enum machine_mode mode,
10657 int *punsignedp,
10658 const_tree funtype,
10659 int for_return)
10661 /* Special processing required for OpenVMS ... */
10663 if (!TARGET_ABI_OPEN_VMS)
10664 return default_promote_function_mode(type, mode, punsignedp, funtype,
10665 for_return);
10667 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10668 HP OpenVMS I64 Version 8.2EFT,
10669 chapter 4 "OpenVMS I64 Conventions"
10670 section 4.7 "Procedure Linkage"
10671 subsection 4.7.5.2, "Normal Register Parameters"
10673 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10674 values passed in registers are zero-filled; signed integral values as
10675 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10676 For all other types passed in the general registers, unused bits are
10677 undefined." */
10679 if (!AGGREGATE_TYPE_P (type)
10680 && GET_MODE_CLASS (mode) == MODE_INT
10681 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10683 if (mode == SImode)
10684 *punsignedp = 0;
10685 return DImode;
10687 else
10688 return promote_mode (type, mode, punsignedp);
10691 static GTY(()) rtx ia64_dconst_0_5_rtx;
10694 ia64_dconst_0_5 (void)
10696 if (! ia64_dconst_0_5_rtx)
10698 REAL_VALUE_TYPE rv;
10699 real_from_string (&rv, "0.5");
10700 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10702 return ia64_dconst_0_5_rtx;
10705 static GTY(()) rtx ia64_dconst_0_375_rtx;
10708 ia64_dconst_0_375 (void)
10710 if (! ia64_dconst_0_375_rtx)
10712 REAL_VALUE_TYPE rv;
10713 real_from_string (&rv, "0.375");
10714 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10716 return ia64_dconst_0_375_rtx;
10720 #include "gt-ia64.h"