Remove outermost loop parameter.
[official-gcc/graphite-test-results.git] / gcc / config / ia64 / ia64.c
blobe1e3dff8b59dffee139e82121966da57c266ec8b
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by James E. Wilson <wilson@cygnus.com> and
6 David Mosberger <davidm@hpl.hp.com>.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "libfuncs.h"
45 #include "toplev.h"
46 #include "sched-int.h"
47 #include "timevar.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "tm_p.h"
51 #include "hashtab.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "gimple.h"
55 #include "intl.h"
56 #include "df.h"
57 #include "debug.h"
58 #include "params.h"
59 #include "dbgcnt.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
63 /* This is used for communication between ASM_OUTPUT_LABEL and
64 ASM_OUTPUT_LABELREF. */
65 int ia64_asm_output_label = 0;
67 /* Register names for ia64_expand_prologue. */
68 static const char * const ia64_reg_numbers[96] =
69 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
70 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
71 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
72 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
73 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
74 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
75 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
76 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
77 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
78 "r104","r105","r106","r107","r108","r109","r110","r111",
79 "r112","r113","r114","r115","r116","r117","r118","r119",
80 "r120","r121","r122","r123","r124","r125","r126","r127"};
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_input_reg_names[8] =
84 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
86 /* ??? These strings could be shared with REGISTER_NAMES. */
87 static const char * const ia64_local_reg_names[80] =
88 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
89 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
90 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
91 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
92 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
93 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
94 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
95 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
96 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
97 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99 /* ??? These strings could be shared with REGISTER_NAMES. */
100 static const char * const ia64_output_reg_names[8] =
101 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103 /* Which cpu are we scheduling for. */
104 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
106 /* Determines whether we run our final scheduling pass or not. We always
107 avoid the normal second scheduling pass. */
108 static int ia64_flag_schedule_insns2;
110 /* Determines whether we run variable tracking in machine dependent
111 reorganization. */
112 static int ia64_flag_var_tracking;
114 /* Variables which are this size or smaller are put in the sdata/sbss
115 sections. */
117 unsigned int ia64_section_threshold;
119 /* The following variable is used by the DFA insn scheduler. The value is
120 TRUE if we do insn bundling instead of insn scheduling. */
121 int bundling_p = 0;
123 enum ia64_frame_regs
125 reg_fp,
126 reg_save_b0,
127 reg_save_pr,
128 reg_save_ar_pfs,
129 reg_save_ar_unat,
130 reg_save_ar_lc,
131 reg_save_gp,
132 number_of_ia64_frame_regs
135 /* Structure to be filled in by ia64_compute_frame_size with register
136 save masks and offsets for the current function. */
138 struct ia64_frame_info
140 HOST_WIDE_INT total_size; /* size of the stack frame, not including
141 the caller's scratch area. */
142 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
143 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
144 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
145 HARD_REG_SET mask; /* mask of saved registers. */
146 unsigned int gr_used_mask; /* mask of registers in use as gr spill
147 registers or long-term scratches. */
148 int n_spilled; /* number of spilled registers. */
149 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
150 int n_input_regs; /* number of input registers used. */
151 int n_local_regs; /* number of local registers used. */
152 int n_output_regs; /* number of output registers used. */
153 int n_rotate_regs; /* number of rotating registers used. */
155 char need_regstk; /* true if a .regstk directive needed. */
156 char initialized; /* true if the data is finalized. */
159 /* Current frame information calculated by ia64_compute_frame_size. */
160 static struct ia64_frame_info current_frame_info;
161 /* The actual registers that are emitted. */
162 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
164 static int ia64_first_cycle_multipass_dfa_lookahead (void);
165 static void ia64_dependencies_evaluation_hook (rtx, rtx);
166 static void ia64_init_dfa_pre_cycle_insn (void);
167 static rtx ia64_dfa_pre_cycle_insn (void);
168 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
169 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
170 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
171 static void ia64_h_i_d_extended (void);
172 static void * ia64_alloc_sched_context (void);
173 static void ia64_init_sched_context (void *, bool);
174 static void ia64_set_sched_context (void *);
175 static void ia64_clear_sched_context (void *);
176 static void ia64_free_sched_context (void *);
177 static int ia64_mode_to_int (enum machine_mode);
178 static void ia64_set_sched_flags (spec_info_t);
179 static ds_t ia64_get_insn_spec_ds (rtx);
180 static ds_t ia64_get_insn_checked_ds (rtx);
181 static bool ia64_skip_rtx_p (const_rtx);
182 static int ia64_speculate_insn (rtx, ds_t, rtx *);
183 static bool ia64_needs_block_p (int);
184 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
185 static int ia64_spec_check_p (rtx);
186 static int ia64_spec_check_src_p (rtx);
187 static rtx gen_tls_get_addr (void);
188 static rtx gen_thread_pointer (void);
189 static int find_gr_spill (enum ia64_frame_regs, int);
190 static int next_scratch_gr_reg (void);
191 static void mark_reg_gr_used_mask (rtx, void *);
192 static void ia64_compute_frame_size (HOST_WIDE_INT);
193 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
194 static void finish_spill_pointers (void);
195 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
196 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
197 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
198 static rtx gen_movdi_x (rtx, rtx, rtx);
199 static rtx gen_fr_spill_x (rtx, rtx, rtx);
200 static rtx gen_fr_restore_x (rtx, rtx, rtx);
202 static bool ia64_can_eliminate (const int, const int);
203 static enum machine_mode hfa_element_mode (const_tree, bool);
204 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
205 tree, int *, int);
206 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
207 tree, bool);
208 static bool ia64_function_ok_for_sibcall (tree, tree);
209 static bool ia64_return_in_memory (const_tree, const_tree);
210 static bool ia64_rtx_costs (rtx, int, int, int *, bool);
211 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
212 static void fix_range (const char *);
213 static bool ia64_handle_option (size_t, const char *, int);
214 static struct machine_function * ia64_init_machine_status (void);
215 static void emit_insn_group_barriers (FILE *);
216 static void emit_all_insn_group_barriers (FILE *);
217 static void final_emit_insn_group_barriers (FILE *);
218 static void emit_predicate_relation_info (void);
219 static void ia64_reorg (void);
220 static bool ia64_in_small_data_p (const_tree);
221 static void process_epilogue (FILE *, rtx, bool, bool);
222 static int process_set (FILE *, rtx, rtx, bool, bool);
224 static bool ia64_assemble_integer (rtx, unsigned int, int);
225 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
226 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
227 static void ia64_output_function_end_prologue (FILE *);
229 static int ia64_issue_rate (void);
230 static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
231 static void ia64_sched_init (FILE *, int, int);
232 static void ia64_sched_init_global (FILE *, int, int);
233 static void ia64_sched_finish_global (FILE *, int);
234 static void ia64_sched_finish (FILE *, int);
235 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
236 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
237 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
238 static int ia64_variable_issue (FILE *, int, rtx, int);
240 static struct bundle_state *get_free_bundle_state (void);
241 static void free_bundle_state (struct bundle_state *);
242 static void initiate_bundle_states (void);
243 static void finish_bundle_states (void);
244 static unsigned bundle_state_hash (const void *);
245 static int bundle_state_eq_p (const void *, const void *);
246 static int insert_bundle_state (struct bundle_state *);
247 static void initiate_bundle_state_table (void);
248 static void finish_bundle_state_table (void);
249 static int try_issue_nops (struct bundle_state *, int);
250 static int try_issue_insn (struct bundle_state *, rtx);
251 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
252 static int get_max_pos (state_t);
253 static int get_template (state_t, int);
255 static rtx get_next_important_insn (rtx, rtx);
256 static bool important_for_bundling_p (rtx);
257 static void bundling (FILE *, int, rtx, rtx);
259 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
260 HOST_WIDE_INT, tree);
261 static void ia64_file_start (void);
262 static void ia64_globalize_decl_name (FILE *, tree);
264 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
265 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
266 static section *ia64_select_rtx_section (enum machine_mode, rtx,
267 unsigned HOST_WIDE_INT);
268 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
269 ATTRIBUTE_UNUSED;
270 static unsigned int ia64_section_type_flags (tree, const char *, int);
271 static void ia64_init_libfuncs (void)
272 ATTRIBUTE_UNUSED;
273 static void ia64_hpux_init_libfuncs (void)
274 ATTRIBUTE_UNUSED;
275 static void ia64_sysv4_init_libfuncs (void)
276 ATTRIBUTE_UNUSED;
277 static void ia64_vms_init_libfuncs (void)
278 ATTRIBUTE_UNUSED;
279 static void ia64_soft_fp_init_libfuncs (void)
280 ATTRIBUTE_UNUSED;
281 static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
282 ATTRIBUTE_UNUSED;
283 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
284 ATTRIBUTE_UNUSED;
286 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
287 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
288 static void ia64_encode_section_info (tree, rtx, int);
289 static rtx ia64_struct_value_rtx (tree, int);
290 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
291 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
292 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
293 static bool ia64_cannot_force_const_mem (rtx);
294 static const char *ia64_mangle_type (const_tree);
295 static const char *ia64_invalid_conversion (const_tree, const_tree);
296 static const char *ia64_invalid_unary_op (int, const_tree);
297 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
298 static enum machine_mode ia64_c_mode_for_suffix (char);
299 static enum machine_mode ia64_promote_function_mode (const_tree,
300 enum machine_mode,
301 int *,
302 const_tree,
303 int);
304 static void ia64_trampoline_init (rtx, tree, rtx);
305 static void ia64_override_options_after_change (void);
307 /* Table of valid machine attributes. */
308 static const struct attribute_spec ia64_attribute_table[] =
310 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
311 { "syscall_linkage", 0, 0, false, true, true, NULL },
312 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
313 #if TARGET_ABI_OPEN_VMS
314 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
315 #endif
316 { "version_id", 1, 1, true, false, false,
317 ia64_handle_version_id_attribute },
318 { NULL, 0, 0, false, false, false, NULL }
321 /* Initialize the GCC target structure. */
322 #undef TARGET_ATTRIBUTE_TABLE
323 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
325 #undef TARGET_INIT_BUILTINS
326 #define TARGET_INIT_BUILTINS ia64_init_builtins
328 #undef TARGET_EXPAND_BUILTIN
329 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
331 #undef TARGET_ASM_BYTE_OP
332 #define TARGET_ASM_BYTE_OP "\tdata1\t"
333 #undef TARGET_ASM_ALIGNED_HI_OP
334 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
335 #undef TARGET_ASM_ALIGNED_SI_OP
336 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
337 #undef TARGET_ASM_ALIGNED_DI_OP
338 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
339 #undef TARGET_ASM_UNALIGNED_HI_OP
340 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
341 #undef TARGET_ASM_UNALIGNED_SI_OP
342 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
343 #undef TARGET_ASM_UNALIGNED_DI_OP
344 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
345 #undef TARGET_ASM_INTEGER
346 #define TARGET_ASM_INTEGER ia64_assemble_integer
348 #undef TARGET_ASM_FUNCTION_PROLOGUE
349 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
350 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
351 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
352 #undef TARGET_ASM_FUNCTION_EPILOGUE
353 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
355 #undef TARGET_IN_SMALL_DATA_P
356 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
358 #undef TARGET_SCHED_ADJUST_COST_2
359 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
360 #undef TARGET_SCHED_ISSUE_RATE
361 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
362 #undef TARGET_SCHED_VARIABLE_ISSUE
363 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
364 #undef TARGET_SCHED_INIT
365 #define TARGET_SCHED_INIT ia64_sched_init
366 #undef TARGET_SCHED_FINISH
367 #define TARGET_SCHED_FINISH ia64_sched_finish
368 #undef TARGET_SCHED_INIT_GLOBAL
369 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
370 #undef TARGET_SCHED_FINISH_GLOBAL
371 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
372 #undef TARGET_SCHED_REORDER
373 #define TARGET_SCHED_REORDER ia64_sched_reorder
374 #undef TARGET_SCHED_REORDER2
375 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
377 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
378 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
380 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
381 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
383 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
384 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
385 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
386 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
388 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
389 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
390 ia64_first_cycle_multipass_dfa_lookahead_guard
392 #undef TARGET_SCHED_DFA_NEW_CYCLE
393 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
395 #undef TARGET_SCHED_H_I_D_EXTENDED
396 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
398 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
399 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
401 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
402 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
404 #undef TARGET_SCHED_SET_SCHED_CONTEXT
405 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
407 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
408 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
410 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
411 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
413 #undef TARGET_SCHED_SET_SCHED_FLAGS
414 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
416 #undef TARGET_SCHED_GET_INSN_SPEC_DS
417 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
419 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
420 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
422 #undef TARGET_SCHED_SPECULATE_INSN
423 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
425 #undef TARGET_SCHED_NEEDS_BLOCK_P
426 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
428 #undef TARGET_SCHED_GEN_SPEC_CHECK
429 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
431 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
432 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
433 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
435 #undef TARGET_SCHED_SKIP_RTX_P
436 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
438 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
439 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
440 #undef TARGET_ARG_PARTIAL_BYTES
441 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
443 #undef TARGET_ASM_OUTPUT_MI_THUNK
444 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
445 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
446 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
448 #undef TARGET_ASM_FILE_START
449 #define TARGET_ASM_FILE_START ia64_file_start
451 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
452 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
454 #undef TARGET_RTX_COSTS
455 #define TARGET_RTX_COSTS ia64_rtx_costs
456 #undef TARGET_ADDRESS_COST
457 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
459 #undef TARGET_UNSPEC_MAY_TRAP_P
460 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
462 #undef TARGET_MACHINE_DEPENDENT_REORG
463 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
465 #undef TARGET_ENCODE_SECTION_INFO
466 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
468 #undef TARGET_SECTION_TYPE_FLAGS
469 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
471 #ifdef HAVE_AS_TLS
472 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
473 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
474 #endif
476 #undef TARGET_PROMOTE_FUNCTION_MODE
477 #define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
479 /* ??? Investigate. */
480 #if 0
481 #undef TARGET_PROMOTE_PROTOTYPES
482 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
483 #endif
485 #undef TARGET_STRUCT_VALUE_RTX
486 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
487 #undef TARGET_RETURN_IN_MEMORY
488 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
489 #undef TARGET_SETUP_INCOMING_VARARGS
490 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
491 #undef TARGET_STRICT_ARGUMENT_NAMING
492 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
493 #undef TARGET_MUST_PASS_IN_STACK
494 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
496 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
497 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
499 #undef TARGET_UNWIND_EMIT
500 #define TARGET_UNWIND_EMIT process_for_unwind_directive
502 #undef TARGET_SCALAR_MODE_SUPPORTED_P
503 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
504 #undef TARGET_VECTOR_MODE_SUPPORTED_P
505 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
507 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
508 in an order different from the specified program order. */
509 #undef TARGET_RELAXED_ORDERING
510 #define TARGET_RELAXED_ORDERING true
512 #undef TARGET_DEFAULT_TARGET_FLAGS
513 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
514 #undef TARGET_HANDLE_OPTION
515 #define TARGET_HANDLE_OPTION ia64_handle_option
517 #undef TARGET_CANNOT_FORCE_CONST_MEM
518 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
520 #undef TARGET_MANGLE_TYPE
521 #define TARGET_MANGLE_TYPE ia64_mangle_type
523 #undef TARGET_INVALID_CONVERSION
524 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
525 #undef TARGET_INVALID_UNARY_OP
526 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
527 #undef TARGET_INVALID_BINARY_OP
528 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
530 #undef TARGET_C_MODE_FOR_SUFFIX
531 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
533 #undef TARGET_CAN_ELIMINATE
534 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
536 #undef TARGET_TRAMPOLINE_INIT
537 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
539 #undef TARGET_INVALID_WITHIN_DOLOOP
540 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
542 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
543 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
545 struct gcc_target targetm = TARGET_INITIALIZER;
547 typedef enum
549 ADDR_AREA_NORMAL, /* normal address area */
550 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
552 ia64_addr_area;
554 static GTY(()) tree small_ident1;
555 static GTY(()) tree small_ident2;
557 static void
558 init_idents (void)
560 if (small_ident1 == 0)
562 small_ident1 = get_identifier ("small");
563 small_ident2 = get_identifier ("__small__");
567 /* Retrieve the address area that has been chosen for the given decl. */
569 static ia64_addr_area
570 ia64_get_addr_area (tree decl)
572 tree model_attr;
574 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
575 if (model_attr)
577 tree id;
579 init_idents ();
580 id = TREE_VALUE (TREE_VALUE (model_attr));
581 if (id == small_ident1 || id == small_ident2)
582 return ADDR_AREA_SMALL;
584 return ADDR_AREA_NORMAL;
587 static tree
588 ia64_handle_model_attribute (tree *node, tree name, tree args,
589 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
591 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
592 ia64_addr_area area;
593 tree arg, decl = *node;
595 init_idents ();
596 arg = TREE_VALUE (args);
597 if (arg == small_ident1 || arg == small_ident2)
599 addr_area = ADDR_AREA_SMALL;
601 else
603 warning (OPT_Wattributes, "invalid argument of %qE attribute",
604 name);
605 *no_add_attrs = true;
608 switch (TREE_CODE (decl))
610 case VAR_DECL:
611 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
612 == FUNCTION_DECL)
613 && !TREE_STATIC (decl))
615 error_at (DECL_SOURCE_LOCATION (decl),
616 "an address area attribute cannot be specified for "
617 "local variables");
618 *no_add_attrs = true;
620 area = ia64_get_addr_area (decl);
621 if (area != ADDR_AREA_NORMAL && addr_area != area)
623 error ("address area of %q+D conflicts with previous "
624 "declaration", decl);
625 *no_add_attrs = true;
627 break;
629 case FUNCTION_DECL:
630 error_at (DECL_SOURCE_LOCATION (decl),
631 "address area attribute cannot be specified for "
632 "functions");
633 *no_add_attrs = true;
634 break;
636 default:
637 warning (OPT_Wattributes, "%qE attribute ignored",
638 name);
639 *no_add_attrs = true;
640 break;
643 return NULL_TREE;
646 /* The section must have global and overlaid attributes. */
647 #define SECTION_VMS_OVERLAY SECTION_MACH_DEP
649 /* Part of the low level implementation of DEC Ada pragma Common_Object which
650 enables the shared use of variables stored in overlaid linker areas
651 corresponding to the use of Fortran COMMON. */
653 static tree
654 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
655 int flags ATTRIBUTE_UNUSED,
656 bool *no_add_attrs)
658 tree decl = *node;
659 tree id, val;
660 if (! DECL_P (decl))
661 abort ();
663 DECL_COMMON (decl) = 1;
664 id = TREE_VALUE (args);
665 if (TREE_CODE (id) == IDENTIFIER_NODE)
666 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
667 else if (TREE_CODE (id) == STRING_CST)
668 val = id;
669 else
671 warning (OPT_Wattributes,
672 "%qE attribute requires a string constant argument", name);
673 *no_add_attrs = true;
674 return NULL_TREE;
676 DECL_SECTION_NAME (decl) = val;
677 return NULL_TREE;
680 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
682 void
683 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
684 unsigned HOST_WIDE_INT size,
685 unsigned int align)
687 tree attr = DECL_ATTRIBUTES (decl);
689 /* As common_object attribute set DECL_SECTION_NAME check it before
690 looking up the attribute. */
691 if (DECL_SECTION_NAME (decl) && attr)
692 attr = lookup_attribute ("common_object", attr);
693 else
694 attr = NULL_TREE;
696 if (!attr)
698 /* Code from elfos.h. */
699 fprintf (file, "%s", COMMON_ASM_OP);
700 assemble_name (file, name);
701 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
702 size, align / BITS_PER_UNIT);
704 else
706 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
707 ASM_OUTPUT_LABEL (file, name);
708 ASM_OUTPUT_SKIP (file, size ? size : 1);
712 /* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
714 void
715 ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
716 tree decl)
718 if (!(flags & SECTION_VMS_OVERLAY))
720 default_elf_asm_named_section (name, flags, decl);
721 return;
723 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
724 abort ();
726 if (flags & SECTION_DECLARED)
728 fprintf (asm_out_file, "\t.section\t%s\n", name);
729 return;
732 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
735 static void
736 ia64_encode_addr_area (tree decl, rtx symbol)
738 int flags;
740 flags = SYMBOL_REF_FLAGS (symbol);
741 switch (ia64_get_addr_area (decl))
743 case ADDR_AREA_NORMAL: break;
744 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
745 default: gcc_unreachable ();
747 SYMBOL_REF_FLAGS (symbol) = flags;
750 static void
751 ia64_encode_section_info (tree decl, rtx rtl, int first)
753 default_encode_section_info (decl, rtl, first);
755 /* Careful not to prod global register variables. */
756 if (TREE_CODE (decl) == VAR_DECL
757 && GET_CODE (DECL_RTL (decl)) == MEM
758 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
759 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
760 ia64_encode_addr_area (decl, XEXP (rtl, 0));
763 /* Return 1 if the operands of a move are ok. */
766 ia64_move_ok (rtx dst, rtx src)
768 /* If we're under init_recog_no_volatile, we'll not be able to use
769 memory_operand. So check the code directly and don't worry about
770 the validity of the underlying address, which should have been
771 checked elsewhere anyway. */
772 if (GET_CODE (dst) != MEM)
773 return 1;
774 if (GET_CODE (src) == MEM)
775 return 0;
776 if (register_operand (src, VOIDmode))
777 return 1;
779 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
780 if (INTEGRAL_MODE_P (GET_MODE (dst)))
781 return src == const0_rtx;
782 else
783 return satisfies_constraint_G (src);
786 /* Return 1 if the operands are ok for a floating point load pair. */
789 ia64_load_pair_ok (rtx dst, rtx src)
791 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
792 return 0;
793 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
794 return 0;
795 switch (GET_CODE (XEXP (src, 0)))
797 case REG:
798 case POST_INC:
799 break;
800 case POST_DEC:
801 return 0;
802 case POST_MODIFY:
804 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
806 if (GET_CODE (adjust) != CONST_INT
807 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
808 return 0;
810 break;
811 default:
812 abort ();
814 return 1;
818 addp4_optimize_ok (rtx op1, rtx op2)
820 return (basereg_operand (op1, GET_MODE(op1)) !=
821 basereg_operand (op2, GET_MODE(op2)));
824 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
825 Return the length of the field, or <= 0 on failure. */
828 ia64_depz_field_mask (rtx rop, rtx rshift)
830 unsigned HOST_WIDE_INT op = INTVAL (rop);
831 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
833 /* Get rid of the zero bits we're shifting in. */
834 op >>= shift;
836 /* We must now have a solid block of 1's at bit 0. */
837 return exact_log2 (op + 1);
840 /* Return the TLS model to use for ADDR. */
842 static enum tls_model
843 tls_symbolic_operand_type (rtx addr)
845 enum tls_model tls_kind = TLS_MODEL_NONE;
847 if (GET_CODE (addr) == CONST)
849 if (GET_CODE (XEXP (addr, 0)) == PLUS
850 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
851 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
853 else if (GET_CODE (addr) == SYMBOL_REF)
854 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
856 return tls_kind;
859 /* Return true if X is a constant that is valid for some immediate
860 field in an instruction. */
862 bool
863 ia64_legitimate_constant_p (rtx x)
865 switch (GET_CODE (x))
867 case CONST_INT:
868 case LABEL_REF:
869 return true;
871 case CONST_DOUBLE:
872 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
873 || GET_MODE (x) == DFmode)
874 return true;
875 return satisfies_constraint_G (x);
877 case CONST:
878 case SYMBOL_REF:
879 /* ??? Short term workaround for PR 28490. We must make the code here
880 match the code in ia64_expand_move and move_operand, even though they
881 are both technically wrong. */
882 if (tls_symbolic_operand_type (x) == 0)
884 HOST_WIDE_INT addend = 0;
885 rtx op = x;
887 if (GET_CODE (op) == CONST
888 && GET_CODE (XEXP (op, 0)) == PLUS
889 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
891 addend = INTVAL (XEXP (XEXP (op, 0), 1));
892 op = XEXP (XEXP (op, 0), 0);
895 if (any_offset_symbol_operand (op, GET_MODE (op))
896 || function_operand (op, GET_MODE (op)))
897 return true;
898 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
899 return (addend & 0x3fff) == 0;
900 return false;
902 return false;
904 case CONST_VECTOR:
906 enum machine_mode mode = GET_MODE (x);
908 if (mode == V2SFmode)
909 return satisfies_constraint_Y (x);
911 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
912 && GET_MODE_SIZE (mode) <= 8);
915 default:
916 return false;
920 /* Don't allow TLS addresses to get spilled to memory. */
922 static bool
923 ia64_cannot_force_const_mem (rtx x)
925 if (GET_MODE (x) == RFmode)
926 return true;
927 return tls_symbolic_operand_type (x) != 0;
930 /* Expand a symbolic constant load. */
932 bool
933 ia64_expand_load_address (rtx dest, rtx src)
935 gcc_assert (GET_CODE (dest) == REG);
937 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
938 having to pointer-extend the value afterward. Other forms of address
939 computation below are also more natural to compute as 64-bit quantities.
940 If we've been given an SImode destination register, change it. */
941 if (GET_MODE (dest) != Pmode)
942 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
943 byte_lowpart_offset (Pmode, GET_MODE (dest)));
945 if (TARGET_NO_PIC)
946 return false;
947 if (small_addr_symbolic_operand (src, VOIDmode))
948 return false;
950 if (TARGET_AUTO_PIC)
951 emit_insn (gen_load_gprel64 (dest, src));
952 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
953 emit_insn (gen_load_fptr (dest, src));
954 else if (sdata_symbolic_operand (src, VOIDmode))
955 emit_insn (gen_load_gprel (dest, src));
956 else
958 HOST_WIDE_INT addend = 0;
959 rtx tmp;
961 /* We did split constant offsets in ia64_expand_move, and we did try
962 to keep them split in move_operand, but we also allowed reload to
963 rematerialize arbitrary constants rather than spill the value to
964 the stack and reload it. So we have to be prepared here to split
965 them apart again. */
966 if (GET_CODE (src) == CONST)
968 HOST_WIDE_INT hi, lo;
970 hi = INTVAL (XEXP (XEXP (src, 0), 1));
971 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
972 hi = hi - lo;
974 if (lo != 0)
976 addend = lo;
977 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
981 tmp = gen_rtx_HIGH (Pmode, src);
982 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
983 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
985 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
986 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
988 if (addend)
990 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
991 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
995 return true;
998 static GTY(()) rtx gen_tls_tga;
999 static rtx
1000 gen_tls_get_addr (void)
1002 if (!gen_tls_tga)
1003 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1004 return gen_tls_tga;
1007 static GTY(()) rtx thread_pointer_rtx;
1008 static rtx
1009 gen_thread_pointer (void)
1011 if (!thread_pointer_rtx)
1012 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1013 return thread_pointer_rtx;
1016 static rtx
1017 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1018 rtx orig_op1, HOST_WIDE_INT addend)
1020 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1021 rtx orig_op0 = op0;
1022 HOST_WIDE_INT addend_lo, addend_hi;
1024 switch (tls_kind)
1026 case TLS_MODEL_GLOBAL_DYNAMIC:
1027 start_sequence ();
1029 tga_op1 = gen_reg_rtx (Pmode);
1030 emit_insn (gen_load_dtpmod (tga_op1, op1));
1032 tga_op2 = gen_reg_rtx (Pmode);
1033 emit_insn (gen_load_dtprel (tga_op2, op1));
1035 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1036 LCT_CONST, Pmode, 2, tga_op1,
1037 Pmode, tga_op2, Pmode);
1039 insns = get_insns ();
1040 end_sequence ();
1042 if (GET_MODE (op0) != Pmode)
1043 op0 = tga_ret;
1044 emit_libcall_block (insns, op0, tga_ret, op1);
1045 break;
1047 case TLS_MODEL_LOCAL_DYNAMIC:
1048 /* ??? This isn't the completely proper way to do local-dynamic
1049 If the call to __tls_get_addr is used only by a single symbol,
1050 then we should (somehow) move the dtprel to the second arg
1051 to avoid the extra add. */
1052 start_sequence ();
1054 tga_op1 = gen_reg_rtx (Pmode);
1055 emit_insn (gen_load_dtpmod (tga_op1, op1));
1057 tga_op2 = const0_rtx;
1059 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1060 LCT_CONST, Pmode, 2, tga_op1,
1061 Pmode, tga_op2, Pmode);
1063 insns = get_insns ();
1064 end_sequence ();
1066 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1067 UNSPEC_LD_BASE);
1068 tmp = gen_reg_rtx (Pmode);
1069 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1071 if (!register_operand (op0, Pmode))
1072 op0 = gen_reg_rtx (Pmode);
1073 if (TARGET_TLS64)
1075 emit_insn (gen_load_dtprel (op0, op1));
1076 emit_insn (gen_adddi3 (op0, tmp, op0));
1078 else
1079 emit_insn (gen_add_dtprel (op0, op1, tmp));
1080 break;
1082 case TLS_MODEL_INITIAL_EXEC:
1083 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1084 addend_hi = addend - addend_lo;
1086 op1 = plus_constant (op1, addend_hi);
1087 addend = addend_lo;
1089 tmp = gen_reg_rtx (Pmode);
1090 emit_insn (gen_load_tprel (tmp, op1));
1092 if (!register_operand (op0, Pmode))
1093 op0 = gen_reg_rtx (Pmode);
1094 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1095 break;
1097 case TLS_MODEL_LOCAL_EXEC:
1098 if (!register_operand (op0, Pmode))
1099 op0 = gen_reg_rtx (Pmode);
1101 op1 = orig_op1;
1102 addend = 0;
1103 if (TARGET_TLS64)
1105 emit_insn (gen_load_tprel (op0, op1));
1106 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1108 else
1109 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1110 break;
1112 default:
1113 gcc_unreachable ();
1116 if (addend)
1117 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1118 orig_op0, 1, OPTAB_DIRECT);
1119 if (orig_op0 == op0)
1120 return NULL_RTX;
1121 if (GET_MODE (orig_op0) == Pmode)
1122 return op0;
1123 return gen_lowpart (GET_MODE (orig_op0), op0);
1127 ia64_expand_move (rtx op0, rtx op1)
1129 enum machine_mode mode = GET_MODE (op0);
1131 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1132 op1 = force_reg (mode, op1);
1134 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1136 HOST_WIDE_INT addend = 0;
1137 enum tls_model tls_kind;
1138 rtx sym = op1;
1140 if (GET_CODE (op1) == CONST
1141 && GET_CODE (XEXP (op1, 0)) == PLUS
1142 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1144 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1145 sym = XEXP (XEXP (op1, 0), 0);
1148 tls_kind = tls_symbolic_operand_type (sym);
1149 if (tls_kind)
1150 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1152 if (any_offset_symbol_operand (sym, mode))
1153 addend = 0;
1154 else if (aligned_offset_symbol_operand (sym, mode))
1156 HOST_WIDE_INT addend_lo, addend_hi;
1158 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1159 addend_hi = addend - addend_lo;
1161 if (addend_lo != 0)
1163 op1 = plus_constant (sym, addend_hi);
1164 addend = addend_lo;
1166 else
1167 addend = 0;
1169 else
1170 op1 = sym;
1172 if (reload_completed)
1174 /* We really should have taken care of this offset earlier. */
1175 gcc_assert (addend == 0);
1176 if (ia64_expand_load_address (op0, op1))
1177 return NULL_RTX;
1180 if (addend)
1182 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1184 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1186 op1 = expand_simple_binop (mode, PLUS, subtarget,
1187 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1188 if (op0 == op1)
1189 return NULL_RTX;
1193 return op1;
1196 /* Split a move from OP1 to OP0 conditional on COND. */
1198 void
1199 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1201 rtx insn, first = get_last_insn ();
1203 emit_move_insn (op0, op1);
1205 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1206 if (INSN_P (insn))
1207 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1208 PATTERN (insn));
1211 /* Split a post-reload TImode or TFmode reference into two DImode
1212 components. This is made extra difficult by the fact that we do
1213 not get any scratch registers to work with, because reload cannot
1214 be prevented from giving us a scratch that overlaps the register
1215 pair involved. So instead, when addressing memory, we tweak the
1216 pointer register up and back down with POST_INCs. Or up and not
1217 back down when we can get away with it.
1219 REVERSED is true when the loads must be done in reversed order
1220 (high word first) for correctness. DEAD is true when the pointer
1221 dies with the second insn we generate and therefore the second
1222 address must not carry a postmodify.
1224 May return an insn which is to be emitted after the moves. */
1226 static rtx
1227 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1229 rtx fixup = 0;
1231 switch (GET_CODE (in))
1233 case REG:
1234 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1235 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1236 break;
1238 case CONST_INT:
1239 case CONST_DOUBLE:
1240 /* Cannot occur reversed. */
1241 gcc_assert (!reversed);
1243 if (GET_MODE (in) != TFmode)
1244 split_double (in, &out[0], &out[1]);
1245 else
1246 /* split_double does not understand how to split a TFmode
1247 quantity into a pair of DImode constants. */
1249 REAL_VALUE_TYPE r;
1250 unsigned HOST_WIDE_INT p[2];
1251 long l[4]; /* TFmode is 128 bits */
1253 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1254 real_to_target (l, &r, TFmode);
1256 if (FLOAT_WORDS_BIG_ENDIAN)
1258 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1259 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1261 else
1263 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1264 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1266 out[0] = GEN_INT (p[0]);
1267 out[1] = GEN_INT (p[1]);
1269 break;
1271 case MEM:
1273 rtx base = XEXP (in, 0);
1274 rtx offset;
1276 switch (GET_CODE (base))
1278 case REG:
1279 if (!reversed)
1281 out[0] = adjust_automodify_address
1282 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1283 out[1] = adjust_automodify_address
1284 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1286 else
1288 /* Reversal requires a pre-increment, which can only
1289 be done as a separate insn. */
1290 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1291 out[0] = adjust_automodify_address
1292 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1293 out[1] = adjust_address (in, DImode, 0);
1295 break;
1297 case POST_INC:
1298 gcc_assert (!reversed && !dead);
1300 /* Just do the increment in two steps. */
1301 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1302 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1303 break;
1305 case POST_DEC:
1306 gcc_assert (!reversed && !dead);
1308 /* Add 8, subtract 24. */
1309 base = XEXP (base, 0);
1310 out[0] = adjust_automodify_address
1311 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1312 out[1] = adjust_automodify_address
1313 (in, DImode,
1314 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1316 break;
1318 case POST_MODIFY:
1319 gcc_assert (!reversed && !dead);
1321 /* Extract and adjust the modification. This case is
1322 trickier than the others, because we might have an
1323 index register, or we might have a combined offset that
1324 doesn't fit a signed 9-bit displacement field. We can
1325 assume the incoming expression is already legitimate. */
1326 offset = XEXP (base, 1);
1327 base = XEXP (base, 0);
1329 out[0] = adjust_automodify_address
1330 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1332 if (GET_CODE (XEXP (offset, 1)) == REG)
1334 /* Can't adjust the postmodify to match. Emit the
1335 original, then a separate addition insn. */
1336 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1337 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1339 else
1341 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1342 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1344 /* Again the postmodify cannot be made to match,
1345 but in this case it's more efficient to get rid
1346 of the postmodify entirely and fix up with an
1347 add insn. */
1348 out[1] = adjust_automodify_address (in, DImode, base, 8);
1349 fixup = gen_adddi3
1350 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1352 else
1354 /* Combined offset still fits in the displacement field.
1355 (We cannot overflow it at the high end.) */
1356 out[1] = adjust_automodify_address
1357 (in, DImode, gen_rtx_POST_MODIFY
1358 (Pmode, base, gen_rtx_PLUS
1359 (Pmode, base,
1360 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1364 break;
1366 default:
1367 gcc_unreachable ();
1369 break;
1372 default:
1373 gcc_unreachable ();
1376 return fixup;
1379 /* Split a TImode or TFmode move instruction after reload.
1380 This is used by *movtf_internal and *movti_internal. */
1381 void
1382 ia64_split_tmode_move (rtx operands[])
1384 rtx in[2], out[2], insn;
1385 rtx fixup[2];
1386 bool dead = false;
1387 bool reversed = false;
1389 /* It is possible for reload to decide to overwrite a pointer with
1390 the value it points to. In that case we have to do the loads in
1391 the appropriate order so that the pointer is not destroyed too
1392 early. Also we must not generate a postmodify for that second
1393 load, or rws_access_regno will die. */
1394 if (GET_CODE (operands[1]) == MEM
1395 && reg_overlap_mentioned_p (operands[0], operands[1]))
1397 rtx base = XEXP (operands[1], 0);
1398 while (GET_CODE (base) != REG)
1399 base = XEXP (base, 0);
1401 if (REGNO (base) == REGNO (operands[0]))
1402 reversed = true;
1403 dead = true;
1405 /* Another reason to do the moves in reversed order is if the first
1406 element of the target register pair is also the second element of
1407 the source register pair. */
1408 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1409 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1410 reversed = true;
1412 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1413 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1415 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1416 if (GET_CODE (EXP) == MEM \
1417 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1418 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1419 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1420 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1422 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1423 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1424 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1426 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1427 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1428 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1430 if (fixup[0])
1431 emit_insn (fixup[0]);
1432 if (fixup[1])
1433 emit_insn (fixup[1]);
1435 #undef MAYBE_ADD_REG_INC_NOTE
1438 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1439 through memory plus an extra GR scratch register. Except that you can
1440 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1441 SECONDARY_RELOAD_CLASS, but not both.
1443 We got into problems in the first place by allowing a construct like
1444 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1445 This solution attempts to prevent this situation from occurring. When
1446 we see something like the above, we spill the inner register to memory. */
1448 static rtx
1449 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1451 if (GET_CODE (in) == SUBREG
1452 && GET_MODE (SUBREG_REG (in)) == TImode
1453 && GET_CODE (SUBREG_REG (in)) == REG)
1455 rtx memt = assign_stack_temp (TImode, 16, 0);
1456 emit_move_insn (memt, SUBREG_REG (in));
1457 return adjust_address (memt, mode, 0);
1459 else if (force && GET_CODE (in) == REG)
1461 rtx memx = assign_stack_temp (mode, 16, 0);
1462 emit_move_insn (memx, in);
1463 return memx;
1465 else
1466 return in;
1469 /* Expand the movxf or movrf pattern (MODE says which) with the given
1470 OPERANDS, returning true if the pattern should then invoke
1471 DONE. */
1473 bool
1474 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1476 rtx op0 = operands[0];
1478 if (GET_CODE (op0) == SUBREG)
1479 op0 = SUBREG_REG (op0);
1481 /* We must support XFmode loads into general registers for stdarg/vararg,
1482 unprototyped calls, and a rare case where a long double is passed as
1483 an argument after a float HFA fills the FP registers. We split them into
1484 DImode loads for convenience. We also need to support XFmode stores
1485 for the last case. This case does not happen for stdarg/vararg routines,
1486 because we do a block store to memory of unnamed arguments. */
1488 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1490 rtx out[2];
1492 /* We're hoping to transform everything that deals with XFmode
1493 quantities and GR registers early in the compiler. */
1494 gcc_assert (can_create_pseudo_p ());
1496 /* Struct to register can just use TImode instead. */
1497 if ((GET_CODE (operands[1]) == SUBREG
1498 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1499 || (GET_CODE (operands[1]) == REG
1500 && GR_REGNO_P (REGNO (operands[1]))))
1502 rtx op1 = operands[1];
1504 if (GET_CODE (op1) == SUBREG)
1505 op1 = SUBREG_REG (op1);
1506 else
1507 op1 = gen_rtx_REG (TImode, REGNO (op1));
1509 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1510 return true;
1513 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1515 /* Don't word-swap when reading in the constant. */
1516 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1517 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1518 0, mode));
1519 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1520 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1521 0, mode));
1522 return true;
1525 /* If the quantity is in a register not known to be GR, spill it. */
1526 if (register_operand (operands[1], mode))
1527 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1529 gcc_assert (GET_CODE (operands[1]) == MEM);
1531 /* Don't word-swap when reading in the value. */
1532 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1533 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1535 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1536 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1537 return true;
1540 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1542 /* We're hoping to transform everything that deals with XFmode
1543 quantities and GR registers early in the compiler. */
1544 gcc_assert (can_create_pseudo_p ());
1546 /* Op0 can't be a GR_REG here, as that case is handled above.
1547 If op0 is a register, then we spill op1, so that we now have a
1548 MEM operand. This requires creating an XFmode subreg of a TImode reg
1549 to force the spill. */
1550 if (register_operand (operands[0], mode))
1552 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1553 op1 = gen_rtx_SUBREG (mode, op1, 0);
1554 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1557 else
1559 rtx in[2];
1561 gcc_assert (GET_CODE (operands[0]) == MEM);
1563 /* Don't word-swap when writing out the value. */
1564 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1565 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1567 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1568 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1569 return true;
1573 if (!reload_in_progress && !reload_completed)
1575 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1577 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1579 rtx memt, memx, in = operands[1];
1580 if (CONSTANT_P (in))
1581 in = validize_mem (force_const_mem (mode, in));
1582 if (GET_CODE (in) == MEM)
1583 memt = adjust_address (in, TImode, 0);
1584 else
1586 memt = assign_stack_temp (TImode, 16, 0);
1587 memx = adjust_address (memt, mode, 0);
1588 emit_move_insn (memx, in);
1590 emit_move_insn (op0, memt);
1591 return true;
1594 if (!ia64_move_ok (operands[0], operands[1]))
1595 operands[1] = force_reg (mode, operands[1]);
1598 return false;
1601 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1602 with the expression that holds the compare result (in VOIDmode). */
1604 static GTY(()) rtx cmptf_libfunc;
1606 void
1607 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1609 enum rtx_code code = GET_CODE (*expr);
1610 rtx cmp;
1612 /* If we have a BImode input, then we already have a compare result, and
1613 do not need to emit another comparison. */
1614 if (GET_MODE (*op0) == BImode)
1616 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1617 cmp = *op0;
1619 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1620 magic number as its third argument, that indicates what to do.
1621 The return value is an integer to be compared against zero. */
1622 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1624 enum qfcmp_magic {
1625 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1626 QCMP_UNORD = 2,
1627 QCMP_EQ = 4,
1628 QCMP_LT = 8,
1629 QCMP_GT = 16
1631 int magic;
1632 enum rtx_code ncode;
1633 rtx ret, insns;
1635 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1636 switch (code)
1638 /* 1 = equal, 0 = not equal. Equality operators do
1639 not raise FP_INVALID when given an SNaN operand. */
1640 case EQ: magic = QCMP_EQ; ncode = NE; break;
1641 case NE: magic = QCMP_EQ; ncode = EQ; break;
1642 /* isunordered() from C99. */
1643 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1644 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1645 /* Relational operators raise FP_INVALID when given
1646 an SNaN operand. */
1647 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1648 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1649 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1650 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1651 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1652 Expanders for buneq etc. weuld have to be added to ia64.md
1653 for this to be useful. */
1654 default: gcc_unreachable ();
1657 start_sequence ();
1659 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1660 *op0, TFmode, *op1, TFmode,
1661 GEN_INT (magic), DImode);
1662 cmp = gen_reg_rtx (BImode);
1663 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1664 gen_rtx_fmt_ee (ncode, BImode,
1665 ret, const0_rtx)));
1667 insns = get_insns ();
1668 end_sequence ();
1670 emit_libcall_block (insns, cmp, cmp,
1671 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1672 code = NE;
1674 else
1676 cmp = gen_reg_rtx (BImode);
1677 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1678 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1679 code = NE;
1682 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1683 *op0 = cmp;
1684 *op1 = const0_rtx;
1687 /* Generate an integral vector comparison. Return true if the condition has
1688 been reversed, and so the sense of the comparison should be inverted. */
1690 static bool
1691 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1692 rtx dest, rtx op0, rtx op1)
1694 bool negate = false;
1695 rtx x;
1697 /* Canonicalize the comparison to EQ, GT, GTU. */
1698 switch (code)
1700 case EQ:
1701 case GT:
1702 case GTU:
1703 break;
1705 case NE:
1706 case LE:
1707 case LEU:
1708 code = reverse_condition (code);
1709 negate = true;
1710 break;
1712 case GE:
1713 case GEU:
1714 code = reverse_condition (code);
1715 negate = true;
1716 /* FALLTHRU */
1718 case LT:
1719 case LTU:
1720 code = swap_condition (code);
1721 x = op0, op0 = op1, op1 = x;
1722 break;
1724 default:
1725 gcc_unreachable ();
1728 /* Unsigned parallel compare is not supported by the hardware. Play some
1729 tricks to turn this into a signed comparison against 0. */
1730 if (code == GTU)
1732 switch (mode)
1734 case V2SImode:
1736 rtx t1, t2, mask;
1738 /* Subtract (-(INT MAX) - 1) from both operands to make
1739 them signed. */
1740 mask = GEN_INT (0x80000000);
1741 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1742 mask = force_reg (mode, mask);
1743 t1 = gen_reg_rtx (mode);
1744 emit_insn (gen_subv2si3 (t1, op0, mask));
1745 t2 = gen_reg_rtx (mode);
1746 emit_insn (gen_subv2si3 (t2, op1, mask));
1747 op0 = t1;
1748 op1 = t2;
1749 code = GT;
1751 break;
1753 case V8QImode:
1754 case V4HImode:
1755 /* Perform a parallel unsigned saturating subtraction. */
1756 x = gen_reg_rtx (mode);
1757 emit_insn (gen_rtx_SET (VOIDmode, x,
1758 gen_rtx_US_MINUS (mode, op0, op1)));
1760 code = EQ;
1761 op0 = x;
1762 op1 = CONST0_RTX (mode);
1763 negate = !negate;
1764 break;
1766 default:
1767 gcc_unreachable ();
1771 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1772 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1774 return negate;
1777 /* Emit an integral vector conditional move. */
1779 void
1780 ia64_expand_vecint_cmov (rtx operands[])
1782 enum machine_mode mode = GET_MODE (operands[0]);
1783 enum rtx_code code = GET_CODE (operands[3]);
1784 bool negate;
1785 rtx cmp, x, ot, of;
1787 cmp = gen_reg_rtx (mode);
1788 negate = ia64_expand_vecint_compare (code, mode, cmp,
1789 operands[4], operands[5]);
1791 ot = operands[1+negate];
1792 of = operands[2-negate];
1794 if (ot == CONST0_RTX (mode))
1796 if (of == CONST0_RTX (mode))
1798 emit_move_insn (operands[0], ot);
1799 return;
1802 x = gen_rtx_NOT (mode, cmp);
1803 x = gen_rtx_AND (mode, x, of);
1804 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1806 else if (of == CONST0_RTX (mode))
1808 x = gen_rtx_AND (mode, cmp, ot);
1809 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1811 else
1813 rtx t, f;
1815 t = gen_reg_rtx (mode);
1816 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1817 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1819 f = gen_reg_rtx (mode);
1820 x = gen_rtx_NOT (mode, cmp);
1821 x = gen_rtx_AND (mode, x, operands[2-negate]);
1822 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1824 x = gen_rtx_IOR (mode, t, f);
1825 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1829 /* Emit an integral vector min or max operation. Return true if all done. */
1831 bool
1832 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1833 rtx operands[])
1835 rtx xops[6];
1837 /* These four combinations are supported directly. */
1838 if (mode == V8QImode && (code == UMIN || code == UMAX))
1839 return false;
1840 if (mode == V4HImode && (code == SMIN || code == SMAX))
1841 return false;
1843 /* This combination can be implemented with only saturating subtraction. */
1844 if (mode == V4HImode && code == UMAX)
1846 rtx x, tmp = gen_reg_rtx (mode);
1848 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1849 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1851 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1852 return true;
1855 /* Everything else implemented via vector comparisons. */
1856 xops[0] = operands[0];
1857 xops[4] = xops[1] = operands[1];
1858 xops[5] = xops[2] = operands[2];
1860 switch (code)
1862 case UMIN:
1863 code = LTU;
1864 break;
1865 case UMAX:
1866 code = GTU;
1867 break;
1868 case SMIN:
1869 code = LT;
1870 break;
1871 case SMAX:
1872 code = GT;
1873 break;
1874 default:
1875 gcc_unreachable ();
1877 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1879 ia64_expand_vecint_cmov (xops);
1880 return true;
1883 /* Emit an integral vector widening sum operations. */
1885 void
1886 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1888 rtx l, h, x, s;
1889 enum machine_mode wmode, mode;
1890 rtx (*unpack_l) (rtx, rtx, rtx);
1891 rtx (*unpack_h) (rtx, rtx, rtx);
1892 rtx (*plus) (rtx, rtx, rtx);
1894 wmode = GET_MODE (operands[0]);
1895 mode = GET_MODE (operands[1]);
1897 switch (mode)
1899 case V8QImode:
1900 unpack_l = gen_unpack1_l;
1901 unpack_h = gen_unpack1_h;
1902 plus = gen_addv4hi3;
1903 break;
1904 case V4HImode:
1905 unpack_l = gen_unpack2_l;
1906 unpack_h = gen_unpack2_h;
1907 plus = gen_addv2si3;
1908 break;
1909 default:
1910 gcc_unreachable ();
1913 /* Fill in x with the sign extension of each element in op1. */
1914 if (unsignedp)
1915 x = CONST0_RTX (mode);
1916 else
1918 bool neg;
1920 x = gen_reg_rtx (mode);
1922 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1923 CONST0_RTX (mode));
1924 gcc_assert (!neg);
1927 l = gen_reg_rtx (wmode);
1928 h = gen_reg_rtx (wmode);
1929 s = gen_reg_rtx (wmode);
1931 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1932 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1933 emit_insn (plus (s, l, operands[2]));
1934 emit_insn (plus (operands[0], h, s));
1937 /* Emit a signed or unsigned V8QI dot product operation. */
1939 void
1940 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1942 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1944 /* Fill in x1 and x2 with the sign extension of each element. */
1945 if (unsignedp)
1946 x1 = x2 = CONST0_RTX (V8QImode);
1947 else
1949 bool neg;
1951 x1 = gen_reg_rtx (V8QImode);
1952 x2 = gen_reg_rtx (V8QImode);
1954 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1955 CONST0_RTX (V8QImode));
1956 gcc_assert (!neg);
1957 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1958 CONST0_RTX (V8QImode));
1959 gcc_assert (!neg);
1962 l1 = gen_reg_rtx (V4HImode);
1963 l2 = gen_reg_rtx (V4HImode);
1964 h1 = gen_reg_rtx (V4HImode);
1965 h2 = gen_reg_rtx (V4HImode);
1967 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1968 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1969 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1970 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1972 p1 = gen_reg_rtx (V2SImode);
1973 p2 = gen_reg_rtx (V2SImode);
1974 p3 = gen_reg_rtx (V2SImode);
1975 p4 = gen_reg_rtx (V2SImode);
1976 emit_insn (gen_pmpy2_r (p1, l1, l2));
1977 emit_insn (gen_pmpy2_l (p2, l1, l2));
1978 emit_insn (gen_pmpy2_r (p3, h1, h2));
1979 emit_insn (gen_pmpy2_l (p4, h1, h2));
1981 s1 = gen_reg_rtx (V2SImode);
1982 s2 = gen_reg_rtx (V2SImode);
1983 s3 = gen_reg_rtx (V2SImode);
1984 emit_insn (gen_addv2si3 (s1, p1, p2));
1985 emit_insn (gen_addv2si3 (s2, p3, p4));
1986 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1987 emit_insn (gen_addv2si3 (operands[0], s2, s3));
1990 /* Emit the appropriate sequence for a call. */
1992 void
1993 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1994 int sibcall_p)
1996 rtx insn, b0;
1998 addr = XEXP (addr, 0);
1999 addr = convert_memory_address (DImode, addr);
2000 b0 = gen_rtx_REG (DImode, R_BR (0));
2002 /* ??? Should do this for functions known to bind local too. */
2003 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2005 if (sibcall_p)
2006 insn = gen_sibcall_nogp (addr);
2007 else if (! retval)
2008 insn = gen_call_nogp (addr, b0);
2009 else
2010 insn = gen_call_value_nogp (retval, addr, b0);
2011 insn = emit_call_insn (insn);
2013 else
2015 if (sibcall_p)
2016 insn = gen_sibcall_gp (addr);
2017 else if (! retval)
2018 insn = gen_call_gp (addr, b0);
2019 else
2020 insn = gen_call_value_gp (retval, addr, b0);
2021 insn = emit_call_insn (insn);
2023 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2026 if (sibcall_p)
2027 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2029 if (TARGET_ABI_OPEN_VMS)
2030 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2031 gen_rtx_REG (DImode, GR_REG (25)));
2034 static void
2035 reg_emitted (enum ia64_frame_regs r)
2037 if (emitted_frame_related_regs[r] == 0)
2038 emitted_frame_related_regs[r] = current_frame_info.r[r];
2039 else
2040 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2043 static int
2044 get_reg (enum ia64_frame_regs r)
2046 reg_emitted (r);
2047 return current_frame_info.r[r];
2050 static bool
2051 is_emitted (int regno)
2053 unsigned int r;
2055 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2056 if (emitted_frame_related_regs[r] == regno)
2057 return true;
2058 return false;
2061 void
2062 ia64_reload_gp (void)
2064 rtx tmp;
2066 if (current_frame_info.r[reg_save_gp])
2068 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2070 else
2072 HOST_WIDE_INT offset;
2073 rtx offset_r;
2075 offset = (current_frame_info.spill_cfa_off
2076 + current_frame_info.spill_size);
2077 if (frame_pointer_needed)
2079 tmp = hard_frame_pointer_rtx;
2080 offset = -offset;
2082 else
2084 tmp = stack_pointer_rtx;
2085 offset = current_frame_info.total_size - offset;
2088 offset_r = GEN_INT (offset);
2089 if (satisfies_constraint_I (offset_r))
2090 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2091 else
2093 emit_move_insn (pic_offset_table_rtx, offset_r);
2094 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2095 pic_offset_table_rtx, tmp));
2098 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2101 emit_move_insn (pic_offset_table_rtx, tmp);
2104 void
2105 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2106 rtx scratch_b, int noreturn_p, int sibcall_p)
2108 rtx insn;
2109 bool is_desc = false;
2111 /* If we find we're calling through a register, then we're actually
2112 calling through a descriptor, so load up the values. */
2113 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2115 rtx tmp;
2116 bool addr_dead_p;
2118 /* ??? We are currently constrained to *not* use peep2, because
2119 we can legitimately change the global lifetime of the GP
2120 (in the form of killing where previously live). This is
2121 because a call through a descriptor doesn't use the previous
2122 value of the GP, while a direct call does, and we do not
2123 commit to either form until the split here.
2125 That said, this means that we lack precise life info for
2126 whether ADDR is dead after this call. This is not terribly
2127 important, since we can fix things up essentially for free
2128 with the POST_DEC below, but it's nice to not use it when we
2129 can immediately tell it's not necessary. */
2130 addr_dead_p = ((noreturn_p || sibcall_p
2131 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2132 REGNO (addr)))
2133 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2135 /* Load the code address into scratch_b. */
2136 tmp = gen_rtx_POST_INC (Pmode, addr);
2137 tmp = gen_rtx_MEM (Pmode, tmp);
2138 emit_move_insn (scratch_r, tmp);
2139 emit_move_insn (scratch_b, scratch_r);
2141 /* Load the GP address. If ADDR is not dead here, then we must
2142 revert the change made above via the POST_INCREMENT. */
2143 if (!addr_dead_p)
2144 tmp = gen_rtx_POST_DEC (Pmode, addr);
2145 else
2146 tmp = addr;
2147 tmp = gen_rtx_MEM (Pmode, tmp);
2148 emit_move_insn (pic_offset_table_rtx, tmp);
2150 is_desc = true;
2151 addr = scratch_b;
2154 if (sibcall_p)
2155 insn = gen_sibcall_nogp (addr);
2156 else if (retval)
2157 insn = gen_call_value_nogp (retval, addr, retaddr);
2158 else
2159 insn = gen_call_nogp (addr, retaddr);
2160 emit_call_insn (insn);
2162 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2163 ia64_reload_gp ();
2166 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2168 This differs from the generic code in that we know about the zero-extending
2169 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2170 also know that ld.acq+cmpxchg.rel equals a full barrier.
2172 The loop we want to generate looks like
2174 cmp_reg = mem;
2175 label:
2176 old_reg = cmp_reg;
2177 new_reg = cmp_reg op val;
2178 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2179 if (cmp_reg != old_reg)
2180 goto label;
2182 Note that we only do the plain load from memory once. Subsequent
2183 iterations use the value loaded by the compare-and-swap pattern. */
2185 void
2186 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2187 rtx old_dst, rtx new_dst)
2189 enum machine_mode mode = GET_MODE (mem);
2190 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2191 enum insn_code icode;
2193 /* Special case for using fetchadd. */
2194 if ((mode == SImode || mode == DImode)
2195 && (code == PLUS || code == MINUS)
2196 && fetchadd_operand (val, mode))
2198 if (code == MINUS)
2199 val = GEN_INT (-INTVAL (val));
2201 if (!old_dst)
2202 old_dst = gen_reg_rtx (mode);
2204 emit_insn (gen_memory_barrier ());
2206 if (mode == SImode)
2207 icode = CODE_FOR_fetchadd_acq_si;
2208 else
2209 icode = CODE_FOR_fetchadd_acq_di;
2210 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2212 if (new_dst)
2214 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2215 true, OPTAB_WIDEN);
2216 if (new_reg != new_dst)
2217 emit_move_insn (new_dst, new_reg);
2219 return;
2222 /* Because of the volatile mem read, we get an ld.acq, which is the
2223 front half of the full barrier. The end half is the cmpxchg.rel. */
2224 gcc_assert (MEM_VOLATILE_P (mem));
2226 old_reg = gen_reg_rtx (DImode);
2227 cmp_reg = gen_reg_rtx (DImode);
2228 label = gen_label_rtx ();
2230 if (mode != DImode)
2232 val = simplify_gen_subreg (DImode, val, mode, 0);
2233 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2235 else
2236 emit_move_insn (cmp_reg, mem);
2238 emit_label (label);
2240 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2241 emit_move_insn (old_reg, cmp_reg);
2242 emit_move_insn (ar_ccv, cmp_reg);
2244 if (old_dst)
2245 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2247 new_reg = cmp_reg;
2248 if (code == NOT)
2250 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2251 true, OPTAB_DIRECT);
2252 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2254 else
2255 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2256 true, OPTAB_DIRECT);
2258 if (mode != DImode)
2259 new_reg = gen_lowpart (mode, new_reg);
2260 if (new_dst)
2261 emit_move_insn (new_dst, new_reg);
2263 switch (mode)
2265 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2266 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2267 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2268 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2269 default:
2270 gcc_unreachable ();
2273 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2275 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2278 /* Begin the assembly file. */
2280 static void
2281 ia64_file_start (void)
2283 /* Variable tracking should be run after all optimizations which change order
2284 of insns. It also needs a valid CFG. This can't be done in
2285 ia64_override_options, because flag_var_tracking is finalized after
2286 that. */
2287 ia64_flag_var_tracking = flag_var_tracking;
2288 flag_var_tracking = 0;
2290 default_file_start ();
2291 emit_safe_across_calls ();
2294 void
2295 emit_safe_across_calls (void)
2297 unsigned int rs, re;
2298 int out_state;
2300 rs = 1;
2301 out_state = 0;
2302 while (1)
2304 while (rs < 64 && call_used_regs[PR_REG (rs)])
2305 rs++;
2306 if (rs >= 64)
2307 break;
2308 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2309 continue;
2310 if (out_state == 0)
2312 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2313 out_state = 1;
2315 else
2316 fputc (',', asm_out_file);
2317 if (re == rs + 1)
2318 fprintf (asm_out_file, "p%u", rs);
2319 else
2320 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2321 rs = re + 1;
2323 if (out_state)
2324 fputc ('\n', asm_out_file);
2327 /* Globalize a declaration. */
2329 static void
2330 ia64_globalize_decl_name (FILE * stream, tree decl)
2332 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2333 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2334 if (version_attr)
2336 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2337 const char *p = TREE_STRING_POINTER (v);
2338 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2340 targetm.asm_out.globalize_label (stream, name);
2341 if (TREE_CODE (decl) == FUNCTION_DECL)
2342 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2345 /* Helper function for ia64_compute_frame_size: find an appropriate general
2346 register to spill some special register to. SPECIAL_SPILL_MASK contains
2347 bits in GR0 to GR31 that have already been allocated by this routine.
2348 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2350 static int
2351 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2353 int regno;
2355 if (emitted_frame_related_regs[r] != 0)
2357 regno = emitted_frame_related_regs[r];
2358 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2359 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2360 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2361 else if (current_function_is_leaf
2362 && regno >= GR_REG (1) && regno <= GR_REG (31))
2363 current_frame_info.gr_used_mask |= 1 << regno;
2365 return regno;
2368 /* If this is a leaf function, first try an otherwise unused
2369 call-clobbered register. */
2370 if (current_function_is_leaf)
2372 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2373 if (! df_regs_ever_live_p (regno)
2374 && call_used_regs[regno]
2375 && ! fixed_regs[regno]
2376 && ! global_regs[regno]
2377 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2378 && ! is_emitted (regno))
2380 current_frame_info.gr_used_mask |= 1 << regno;
2381 return regno;
2385 if (try_locals)
2387 regno = current_frame_info.n_local_regs;
2388 /* If there is a frame pointer, then we can't use loc79, because
2389 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2390 reg_name switching code in ia64_expand_prologue. */
2391 while (regno < (80 - frame_pointer_needed))
2392 if (! is_emitted (LOC_REG (regno++)))
2394 current_frame_info.n_local_regs = regno;
2395 return LOC_REG (regno - 1);
2399 /* Failed to find a general register to spill to. Must use stack. */
2400 return 0;
2403 /* In order to make for nice schedules, we try to allocate every temporary
2404 to a different register. We must of course stay away from call-saved,
2405 fixed, and global registers. We must also stay away from registers
2406 allocated in current_frame_info.gr_used_mask, since those include regs
2407 used all through the prologue.
2409 Any register allocated here must be used immediately. The idea is to
2410 aid scheduling, not to solve data flow problems. */
2412 static int last_scratch_gr_reg;
2414 static int
2415 next_scratch_gr_reg (void)
2417 int i, regno;
2419 for (i = 0; i < 32; ++i)
2421 regno = (last_scratch_gr_reg + i + 1) & 31;
2422 if (call_used_regs[regno]
2423 && ! fixed_regs[regno]
2424 && ! global_regs[regno]
2425 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2427 last_scratch_gr_reg = regno;
2428 return regno;
2432 /* There must be _something_ available. */
2433 gcc_unreachable ();
2436 /* Helper function for ia64_compute_frame_size, called through
2437 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2439 static void
2440 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2442 unsigned int regno = REGNO (reg);
2443 if (regno < 32)
2445 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2446 for (i = 0; i < n; ++i)
2447 current_frame_info.gr_used_mask |= 1 << (regno + i);
2452 /* Returns the number of bytes offset between the frame pointer and the stack
2453 pointer for the current function. SIZE is the number of bytes of space
2454 needed for local variables. */
2456 static void
2457 ia64_compute_frame_size (HOST_WIDE_INT size)
2459 HOST_WIDE_INT total_size;
2460 HOST_WIDE_INT spill_size = 0;
2461 HOST_WIDE_INT extra_spill_size = 0;
2462 HOST_WIDE_INT pretend_args_size;
2463 HARD_REG_SET mask;
2464 int n_spilled = 0;
2465 int spilled_gr_p = 0;
2466 int spilled_fr_p = 0;
2467 unsigned int regno;
2468 int min_regno;
2469 int max_regno;
2470 int i;
2472 if (current_frame_info.initialized)
2473 return;
2475 memset (&current_frame_info, 0, sizeof current_frame_info);
2476 CLEAR_HARD_REG_SET (mask);
2478 /* Don't allocate scratches to the return register. */
2479 diddle_return_value (mark_reg_gr_used_mask, NULL);
2481 /* Don't allocate scratches to the EH scratch registers. */
2482 if (cfun->machine->ia64_eh_epilogue_sp)
2483 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2484 if (cfun->machine->ia64_eh_epilogue_bsp)
2485 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2487 /* Find the size of the register stack frame. We have only 80 local
2488 registers, because we reserve 8 for the inputs and 8 for the
2489 outputs. */
2491 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2492 since we'll be adjusting that down later. */
2493 regno = LOC_REG (78) + ! frame_pointer_needed;
2494 for (; regno >= LOC_REG (0); regno--)
2495 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2496 break;
2497 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2499 /* For functions marked with the syscall_linkage attribute, we must mark
2500 all eight input registers as in use, so that locals aren't visible to
2501 the caller. */
2503 if (cfun->machine->n_varargs > 0
2504 || lookup_attribute ("syscall_linkage",
2505 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2506 current_frame_info.n_input_regs = 8;
2507 else
2509 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2510 if (df_regs_ever_live_p (regno))
2511 break;
2512 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2515 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2516 if (df_regs_ever_live_p (regno))
2517 break;
2518 i = regno - OUT_REG (0) + 1;
2520 #ifndef PROFILE_HOOK
2521 /* When -p profiling, we need one output register for the mcount argument.
2522 Likewise for -a profiling for the bb_init_func argument. For -ax
2523 profiling, we need two output registers for the two bb_init_trace_func
2524 arguments. */
2525 if (crtl->profile)
2526 i = MAX (i, 1);
2527 #endif
2528 current_frame_info.n_output_regs = i;
2530 /* ??? No rotating register support yet. */
2531 current_frame_info.n_rotate_regs = 0;
2533 /* Discover which registers need spilling, and how much room that
2534 will take. Begin with floating point and general registers,
2535 which will always wind up on the stack. */
2537 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2538 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2540 SET_HARD_REG_BIT (mask, regno);
2541 spill_size += 16;
2542 n_spilled += 1;
2543 spilled_fr_p = 1;
2546 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2547 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2549 SET_HARD_REG_BIT (mask, regno);
2550 spill_size += 8;
2551 n_spilled += 1;
2552 spilled_gr_p = 1;
2555 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2556 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2558 SET_HARD_REG_BIT (mask, regno);
2559 spill_size += 8;
2560 n_spilled += 1;
2563 /* Now come all special registers that might get saved in other
2564 general registers. */
2566 if (frame_pointer_needed)
2568 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2569 /* If we did not get a register, then we take LOC79. This is guaranteed
2570 to be free, even if regs_ever_live is already set, because this is
2571 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2572 as we don't count loc79 above. */
2573 if (current_frame_info.r[reg_fp] == 0)
2575 current_frame_info.r[reg_fp] = LOC_REG (79);
2576 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2580 if (! current_function_is_leaf)
2582 /* Emit a save of BR0 if we call other functions. Do this even
2583 if this function doesn't return, as EH depends on this to be
2584 able to unwind the stack. */
2585 SET_HARD_REG_BIT (mask, BR_REG (0));
2587 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2588 if (current_frame_info.r[reg_save_b0] == 0)
2590 extra_spill_size += 8;
2591 n_spilled += 1;
2594 /* Similarly for ar.pfs. */
2595 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2596 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2597 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2599 extra_spill_size += 8;
2600 n_spilled += 1;
2603 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2604 registers are clobbered, so we fall back to the stack. */
2605 current_frame_info.r[reg_save_gp]
2606 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2607 if (current_frame_info.r[reg_save_gp] == 0)
2609 SET_HARD_REG_BIT (mask, GR_REG (1));
2610 spill_size += 8;
2611 n_spilled += 1;
2614 else
2616 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2618 SET_HARD_REG_BIT (mask, BR_REG (0));
2619 extra_spill_size += 8;
2620 n_spilled += 1;
2623 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2625 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2626 current_frame_info.r[reg_save_ar_pfs]
2627 = find_gr_spill (reg_save_ar_pfs, 1);
2628 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2630 extra_spill_size += 8;
2631 n_spilled += 1;
2636 /* Unwind descriptor hackery: things are most efficient if we allocate
2637 consecutive GR save registers for RP, PFS, FP in that order. However,
2638 it is absolutely critical that FP get the only hard register that's
2639 guaranteed to be free, so we allocated it first. If all three did
2640 happen to be allocated hard regs, and are consecutive, rearrange them
2641 into the preferred order now.
2643 If we have already emitted code for any of those registers,
2644 then it's already too late to change. */
2645 min_regno = MIN (current_frame_info.r[reg_fp],
2646 MIN (current_frame_info.r[reg_save_b0],
2647 current_frame_info.r[reg_save_ar_pfs]));
2648 max_regno = MAX (current_frame_info.r[reg_fp],
2649 MAX (current_frame_info.r[reg_save_b0],
2650 current_frame_info.r[reg_save_ar_pfs]));
2651 if (min_regno > 0
2652 && min_regno + 2 == max_regno
2653 && (current_frame_info.r[reg_fp] == min_regno + 1
2654 || current_frame_info.r[reg_save_b0] == min_regno + 1
2655 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2656 && (emitted_frame_related_regs[reg_save_b0] == 0
2657 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2658 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2659 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2660 && (emitted_frame_related_regs[reg_fp] == 0
2661 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2663 current_frame_info.r[reg_save_b0] = min_regno;
2664 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2665 current_frame_info.r[reg_fp] = min_regno + 2;
2668 /* See if we need to store the predicate register block. */
2669 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2670 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2671 break;
2672 if (regno <= PR_REG (63))
2674 SET_HARD_REG_BIT (mask, PR_REG (0));
2675 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2676 if (current_frame_info.r[reg_save_pr] == 0)
2678 extra_spill_size += 8;
2679 n_spilled += 1;
2682 /* ??? Mark them all as used so that register renaming and such
2683 are free to use them. */
2684 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2685 df_set_regs_ever_live (regno, true);
2688 /* If we're forced to use st8.spill, we're forced to save and restore
2689 ar.unat as well. The check for existing liveness allows inline asm
2690 to touch ar.unat. */
2691 if (spilled_gr_p || cfun->machine->n_varargs
2692 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2694 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2695 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2696 current_frame_info.r[reg_save_ar_unat]
2697 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2698 if (current_frame_info.r[reg_save_ar_unat] == 0)
2700 extra_spill_size += 8;
2701 n_spilled += 1;
2705 if (df_regs_ever_live_p (AR_LC_REGNUM))
2707 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2708 current_frame_info.r[reg_save_ar_lc]
2709 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2710 if (current_frame_info.r[reg_save_ar_lc] == 0)
2712 extra_spill_size += 8;
2713 n_spilled += 1;
2717 /* If we have an odd number of words of pretend arguments written to
2718 the stack, then the FR save area will be unaligned. We round the
2719 size of this area up to keep things 16 byte aligned. */
2720 if (spilled_fr_p)
2721 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2722 else
2723 pretend_args_size = crtl->args.pretend_args_size;
2725 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2726 + crtl->outgoing_args_size);
2727 total_size = IA64_STACK_ALIGN (total_size);
2729 /* We always use the 16-byte scratch area provided by the caller, but
2730 if we are a leaf function, there's no one to which we need to provide
2731 a scratch area. */
2732 if (current_function_is_leaf)
2733 total_size = MAX (0, total_size - 16);
2735 current_frame_info.total_size = total_size;
2736 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2737 current_frame_info.spill_size = spill_size;
2738 current_frame_info.extra_spill_size = extra_spill_size;
2739 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2740 current_frame_info.n_spilled = n_spilled;
2741 current_frame_info.initialized = reload_completed;
2744 /* Worker function for TARGET_CAN_ELIMINATE. */
2746 bool
2747 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2749 return (to == BR_REG (0) ? current_function_is_leaf : true);
2752 /* Compute the initial difference between the specified pair of registers. */
2754 HOST_WIDE_INT
2755 ia64_initial_elimination_offset (int from, int to)
2757 HOST_WIDE_INT offset;
2759 ia64_compute_frame_size (get_frame_size ());
2760 switch (from)
2762 case FRAME_POINTER_REGNUM:
2763 switch (to)
2765 case HARD_FRAME_POINTER_REGNUM:
2766 if (current_function_is_leaf)
2767 offset = -current_frame_info.total_size;
2768 else
2769 offset = -(current_frame_info.total_size
2770 - crtl->outgoing_args_size - 16);
2771 break;
2773 case STACK_POINTER_REGNUM:
2774 if (current_function_is_leaf)
2775 offset = 0;
2776 else
2777 offset = 16 + crtl->outgoing_args_size;
2778 break;
2780 default:
2781 gcc_unreachable ();
2783 break;
2785 case ARG_POINTER_REGNUM:
2786 /* Arguments start above the 16 byte save area, unless stdarg
2787 in which case we store through the 16 byte save area. */
2788 switch (to)
2790 case HARD_FRAME_POINTER_REGNUM:
2791 offset = 16 - crtl->args.pretend_args_size;
2792 break;
2794 case STACK_POINTER_REGNUM:
2795 offset = (current_frame_info.total_size
2796 + 16 - crtl->args.pretend_args_size);
2797 break;
2799 default:
2800 gcc_unreachable ();
2802 break;
2804 default:
2805 gcc_unreachable ();
2808 return offset;
2811 /* If there are more than a trivial number of register spills, we use
2812 two interleaved iterators so that we can get two memory references
2813 per insn group.
2815 In order to simplify things in the prologue and epilogue expanders,
2816 we use helper functions to fix up the memory references after the
2817 fact with the appropriate offsets to a POST_MODIFY memory mode.
2818 The following data structure tracks the state of the two iterators
2819 while insns are being emitted. */
2821 struct spill_fill_data
2823 rtx init_after; /* point at which to emit initializations */
2824 rtx init_reg[2]; /* initial base register */
2825 rtx iter_reg[2]; /* the iterator registers */
2826 rtx *prev_addr[2]; /* address of last memory use */
2827 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2828 HOST_WIDE_INT prev_off[2]; /* last offset */
2829 int n_iter; /* number of iterators in use */
2830 int next_iter; /* next iterator to use */
2831 unsigned int save_gr_used_mask;
2834 static struct spill_fill_data spill_fill_data;
2836 static void
2837 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2839 int i;
2841 spill_fill_data.init_after = get_last_insn ();
2842 spill_fill_data.init_reg[0] = init_reg;
2843 spill_fill_data.init_reg[1] = init_reg;
2844 spill_fill_data.prev_addr[0] = NULL;
2845 spill_fill_data.prev_addr[1] = NULL;
2846 spill_fill_data.prev_insn[0] = NULL;
2847 spill_fill_data.prev_insn[1] = NULL;
2848 spill_fill_data.prev_off[0] = cfa_off;
2849 spill_fill_data.prev_off[1] = cfa_off;
2850 spill_fill_data.next_iter = 0;
2851 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2853 spill_fill_data.n_iter = 1 + (n_spills > 2);
2854 for (i = 0; i < spill_fill_data.n_iter; ++i)
2856 int regno = next_scratch_gr_reg ();
2857 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2858 current_frame_info.gr_used_mask |= 1 << regno;
2862 static void
2863 finish_spill_pointers (void)
2865 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2868 static rtx
2869 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2871 int iter = spill_fill_data.next_iter;
2872 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2873 rtx disp_rtx = GEN_INT (disp);
2874 rtx mem;
2876 if (spill_fill_data.prev_addr[iter])
2878 if (satisfies_constraint_N (disp_rtx))
2880 *spill_fill_data.prev_addr[iter]
2881 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2882 gen_rtx_PLUS (DImode,
2883 spill_fill_data.iter_reg[iter],
2884 disp_rtx));
2885 add_reg_note (spill_fill_data.prev_insn[iter],
2886 REG_INC, spill_fill_data.iter_reg[iter]);
2888 else
2890 /* ??? Could use register post_modify for loads. */
2891 if (!satisfies_constraint_I (disp_rtx))
2893 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2894 emit_move_insn (tmp, disp_rtx);
2895 disp_rtx = tmp;
2897 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2898 spill_fill_data.iter_reg[iter], disp_rtx));
2901 /* Micro-optimization: if we've created a frame pointer, it's at
2902 CFA 0, which may allow the real iterator to be initialized lower,
2903 slightly increasing parallelism. Also, if there are few saves
2904 it may eliminate the iterator entirely. */
2905 else if (disp == 0
2906 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2907 && frame_pointer_needed)
2909 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2910 set_mem_alias_set (mem, get_varargs_alias_set ());
2911 return mem;
2913 else
2915 rtx seq, insn;
2917 if (disp == 0)
2918 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2919 spill_fill_data.init_reg[iter]);
2920 else
2922 start_sequence ();
2924 if (!satisfies_constraint_I (disp_rtx))
2926 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2927 emit_move_insn (tmp, disp_rtx);
2928 disp_rtx = tmp;
2931 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2932 spill_fill_data.init_reg[iter],
2933 disp_rtx));
2935 seq = get_insns ();
2936 end_sequence ();
2939 /* Careful for being the first insn in a sequence. */
2940 if (spill_fill_data.init_after)
2941 insn = emit_insn_after (seq, spill_fill_data.init_after);
2942 else
2944 rtx first = get_insns ();
2945 if (first)
2946 insn = emit_insn_before (seq, first);
2947 else
2948 insn = emit_insn (seq);
2950 spill_fill_data.init_after = insn;
2953 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2955 /* ??? Not all of the spills are for varargs, but some of them are.
2956 The rest of the spills belong in an alias set of their own. But
2957 it doesn't actually hurt to include them here. */
2958 set_mem_alias_set (mem, get_varargs_alias_set ());
2960 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2961 spill_fill_data.prev_off[iter] = cfa_off;
2963 if (++iter >= spill_fill_data.n_iter)
2964 iter = 0;
2965 spill_fill_data.next_iter = iter;
2967 return mem;
2970 static void
2971 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2972 rtx frame_reg)
2974 int iter = spill_fill_data.next_iter;
2975 rtx mem, insn;
2977 mem = spill_restore_mem (reg, cfa_off);
2978 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2979 spill_fill_data.prev_insn[iter] = insn;
2981 if (frame_reg)
2983 rtx base;
2984 HOST_WIDE_INT off;
2986 RTX_FRAME_RELATED_P (insn) = 1;
2988 /* Don't even pretend that the unwind code can intuit its way
2989 through a pair of interleaved post_modify iterators. Just
2990 provide the correct answer. */
2992 if (frame_pointer_needed)
2994 base = hard_frame_pointer_rtx;
2995 off = - cfa_off;
2997 else
2999 base = stack_pointer_rtx;
3000 off = current_frame_info.total_size - cfa_off;
3003 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3004 gen_rtx_SET (VOIDmode,
3005 gen_rtx_MEM (GET_MODE (reg),
3006 plus_constant (base, off)),
3007 frame_reg));
3011 static void
3012 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3014 int iter = spill_fill_data.next_iter;
3015 rtx insn;
3017 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3018 GEN_INT (cfa_off)));
3019 spill_fill_data.prev_insn[iter] = insn;
3022 /* Wrapper functions that discards the CONST_INT spill offset. These
3023 exist so that we can give gr_spill/gr_fill the offset they need and
3024 use a consistent function interface. */
3026 static rtx
3027 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3029 return gen_movdi (dest, src);
3032 static rtx
3033 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3035 return gen_fr_spill (dest, src);
3038 static rtx
3039 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3041 return gen_fr_restore (dest, src);
3044 /* Called after register allocation to add any instructions needed for the
3045 prologue. Using a prologue insn is favored compared to putting all of the
3046 instructions in output_function_prologue(), since it allows the scheduler
3047 to intermix instructions with the saves of the caller saved registers. In
3048 some cases, it might be necessary to emit a barrier instruction as the last
3049 insn to prevent such scheduling.
3051 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3052 so that the debug info generation code can handle them properly.
3054 The register save area is layed out like so:
3055 cfa+16
3056 [ varargs spill area ]
3057 [ fr register spill area ]
3058 [ br register spill area ]
3059 [ ar register spill area ]
3060 [ pr register spill area ]
3061 [ gr register spill area ] */
3063 /* ??? Get inefficient code when the frame size is larger than can fit in an
3064 adds instruction. */
3066 void
3067 ia64_expand_prologue (void)
3069 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3070 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3071 rtx reg, alt_reg;
3073 ia64_compute_frame_size (get_frame_size ());
3074 last_scratch_gr_reg = 15;
3076 if (dump_file)
3078 fprintf (dump_file, "ia64 frame related registers "
3079 "recorded in current_frame_info.r[]:\n");
3080 #define PRINTREG(a) if (current_frame_info.r[a]) \
3081 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3082 PRINTREG(reg_fp);
3083 PRINTREG(reg_save_b0);
3084 PRINTREG(reg_save_pr);
3085 PRINTREG(reg_save_ar_pfs);
3086 PRINTREG(reg_save_ar_unat);
3087 PRINTREG(reg_save_ar_lc);
3088 PRINTREG(reg_save_gp);
3089 #undef PRINTREG
3092 /* If there is no epilogue, then we don't need some prologue insns.
3093 We need to avoid emitting the dead prologue insns, because flow
3094 will complain about them. */
3095 if (optimize)
3097 edge e;
3098 edge_iterator ei;
3100 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3101 if ((e->flags & EDGE_FAKE) == 0
3102 && (e->flags & EDGE_FALLTHRU) != 0)
3103 break;
3104 epilogue_p = (e != NULL);
3106 else
3107 epilogue_p = 1;
3109 /* Set the local, input, and output register names. We need to do this
3110 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3111 half. If we use in/loc/out register names, then we get assembler errors
3112 in crtn.S because there is no alloc insn or regstk directive in there. */
3113 if (! TARGET_REG_NAMES)
3115 int inputs = current_frame_info.n_input_regs;
3116 int locals = current_frame_info.n_local_regs;
3117 int outputs = current_frame_info.n_output_regs;
3119 for (i = 0; i < inputs; i++)
3120 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3121 for (i = 0; i < locals; i++)
3122 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3123 for (i = 0; i < outputs; i++)
3124 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3127 /* Set the frame pointer register name. The regnum is logically loc79,
3128 but of course we'll not have allocated that many locals. Rather than
3129 worrying about renumbering the existing rtxs, we adjust the name. */
3130 /* ??? This code means that we can never use one local register when
3131 there is a frame pointer. loc79 gets wasted in this case, as it is
3132 renamed to a register that will never be used. See also the try_locals
3133 code in find_gr_spill. */
3134 if (current_frame_info.r[reg_fp])
3136 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3137 reg_names[HARD_FRAME_POINTER_REGNUM]
3138 = reg_names[current_frame_info.r[reg_fp]];
3139 reg_names[current_frame_info.r[reg_fp]] = tmp;
3142 /* We don't need an alloc instruction if we've used no outputs or locals. */
3143 if (current_frame_info.n_local_regs == 0
3144 && current_frame_info.n_output_regs == 0
3145 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3146 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3148 /* If there is no alloc, but there are input registers used, then we
3149 need a .regstk directive. */
3150 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3151 ar_pfs_save_reg = NULL_RTX;
3153 else
3155 current_frame_info.need_regstk = 0;
3157 if (current_frame_info.r[reg_save_ar_pfs])
3159 regno = current_frame_info.r[reg_save_ar_pfs];
3160 reg_emitted (reg_save_ar_pfs);
3162 else
3163 regno = next_scratch_gr_reg ();
3164 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3166 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3167 GEN_INT (current_frame_info.n_input_regs),
3168 GEN_INT (current_frame_info.n_local_regs),
3169 GEN_INT (current_frame_info.n_output_regs),
3170 GEN_INT (current_frame_info.n_rotate_regs)));
3171 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3174 /* Set up frame pointer, stack pointer, and spill iterators. */
3176 n_varargs = cfun->machine->n_varargs;
3177 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3178 stack_pointer_rtx, 0);
3180 if (frame_pointer_needed)
3182 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3183 RTX_FRAME_RELATED_P (insn) = 1;
3186 if (current_frame_info.total_size != 0)
3188 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3189 rtx offset;
3191 if (satisfies_constraint_I (frame_size_rtx))
3192 offset = frame_size_rtx;
3193 else
3195 regno = next_scratch_gr_reg ();
3196 offset = gen_rtx_REG (DImode, regno);
3197 emit_move_insn (offset, frame_size_rtx);
3200 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3201 stack_pointer_rtx, offset));
3203 if (! frame_pointer_needed)
3205 RTX_FRAME_RELATED_P (insn) = 1;
3206 if (GET_CODE (offset) != CONST_INT)
3207 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3208 gen_rtx_SET (VOIDmode,
3209 stack_pointer_rtx,
3210 gen_rtx_PLUS (DImode,
3211 stack_pointer_rtx,
3212 frame_size_rtx)));
3215 /* ??? At this point we must generate a magic insn that appears to
3216 modify the stack pointer, the frame pointer, and all spill
3217 iterators. This would allow the most scheduling freedom. For
3218 now, just hard stop. */
3219 emit_insn (gen_blockage ());
3222 /* Must copy out ar.unat before doing any integer spills. */
3223 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3225 if (current_frame_info.r[reg_save_ar_unat])
3227 ar_unat_save_reg
3228 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3229 reg_emitted (reg_save_ar_unat);
3231 else
3233 alt_regno = next_scratch_gr_reg ();
3234 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3235 current_frame_info.gr_used_mask |= 1 << alt_regno;
3238 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3239 insn = emit_move_insn (ar_unat_save_reg, reg);
3240 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
3242 /* Even if we're not going to generate an epilogue, we still
3243 need to save the register so that EH works. */
3244 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3245 emit_insn (gen_prologue_use (ar_unat_save_reg));
3247 else
3248 ar_unat_save_reg = NULL_RTX;
3250 /* Spill all varargs registers. Do this before spilling any GR registers,
3251 since we want the UNAT bits for the GR registers to override the UNAT
3252 bits from varargs, which we don't care about. */
3254 cfa_off = -16;
3255 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3257 reg = gen_rtx_REG (DImode, regno);
3258 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3261 /* Locate the bottom of the register save area. */
3262 cfa_off = (current_frame_info.spill_cfa_off
3263 + current_frame_info.spill_size
3264 + current_frame_info.extra_spill_size);
3266 /* Save the predicate register block either in a register or in memory. */
3267 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3269 reg = gen_rtx_REG (DImode, PR_REG (0));
3270 if (current_frame_info.r[reg_save_pr] != 0)
3272 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3273 reg_emitted (reg_save_pr);
3274 insn = emit_move_insn (alt_reg, reg);
3276 /* ??? Denote pr spill/fill by a DImode move that modifies all
3277 64 hard registers. */
3278 RTX_FRAME_RELATED_P (insn) = 1;
3279 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3280 gen_rtx_SET (VOIDmode, alt_reg, reg));
3282 /* Even if we're not going to generate an epilogue, we still
3283 need to save the register so that EH works. */
3284 if (! epilogue_p)
3285 emit_insn (gen_prologue_use (alt_reg));
3287 else
3289 alt_regno = next_scratch_gr_reg ();
3290 alt_reg = gen_rtx_REG (DImode, alt_regno);
3291 insn = emit_move_insn (alt_reg, reg);
3292 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3293 cfa_off -= 8;
3297 /* Handle AR regs in numerical order. All of them get special handling. */
3298 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3299 && current_frame_info.r[reg_save_ar_unat] == 0)
3301 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3302 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3303 cfa_off -= 8;
3306 /* The alloc insn already copied ar.pfs into a general register. The
3307 only thing we have to do now is copy that register to a stack slot
3308 if we'd not allocated a local register for the job. */
3309 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3310 && current_frame_info.r[reg_save_ar_pfs] == 0)
3312 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3313 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3314 cfa_off -= 8;
3317 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3319 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3320 if (current_frame_info.r[reg_save_ar_lc] != 0)
3322 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3323 reg_emitted (reg_save_ar_lc);
3324 insn = emit_move_insn (alt_reg, reg);
3325 RTX_FRAME_RELATED_P (insn) = 1;
3327 /* Even if we're not going to generate an epilogue, we still
3328 need to save the register so that EH works. */
3329 if (! epilogue_p)
3330 emit_insn (gen_prologue_use (alt_reg));
3332 else
3334 alt_regno = next_scratch_gr_reg ();
3335 alt_reg = gen_rtx_REG (DImode, alt_regno);
3336 emit_move_insn (alt_reg, reg);
3337 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3338 cfa_off -= 8;
3342 /* Save the return pointer. */
3343 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3345 reg = gen_rtx_REG (DImode, BR_REG (0));
3346 if (current_frame_info.r[reg_save_b0] != 0)
3348 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3349 reg_emitted (reg_save_b0);
3350 insn = emit_move_insn (alt_reg, reg);
3351 RTX_FRAME_RELATED_P (insn) = 1;
3353 /* Even if we're not going to generate an epilogue, we still
3354 need to save the register so that EH works. */
3355 if (! epilogue_p)
3356 emit_insn (gen_prologue_use (alt_reg));
3358 else
3360 alt_regno = next_scratch_gr_reg ();
3361 alt_reg = gen_rtx_REG (DImode, alt_regno);
3362 emit_move_insn (alt_reg, reg);
3363 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3364 cfa_off -= 8;
3368 if (current_frame_info.r[reg_save_gp])
3370 reg_emitted (reg_save_gp);
3371 insn = emit_move_insn (gen_rtx_REG (DImode,
3372 current_frame_info.r[reg_save_gp]),
3373 pic_offset_table_rtx);
3376 /* We should now be at the base of the gr/br/fr spill area. */
3377 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3378 + current_frame_info.spill_size));
3380 /* Spill all general registers. */
3381 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3382 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3384 reg = gen_rtx_REG (DImode, regno);
3385 do_spill (gen_gr_spill, reg, cfa_off, reg);
3386 cfa_off -= 8;
3389 /* Spill the rest of the BR registers. */
3390 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3391 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3393 alt_regno = next_scratch_gr_reg ();
3394 alt_reg = gen_rtx_REG (DImode, alt_regno);
3395 reg = gen_rtx_REG (DImode, regno);
3396 emit_move_insn (alt_reg, reg);
3397 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3398 cfa_off -= 8;
3401 /* Align the frame and spill all FR registers. */
3402 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3403 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3405 gcc_assert (!(cfa_off & 15));
3406 reg = gen_rtx_REG (XFmode, regno);
3407 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3408 cfa_off -= 16;
3411 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3413 finish_spill_pointers ();
3416 /* Called after register allocation to add any instructions needed for the
3417 epilogue. Using an epilogue insn is favored compared to putting all of the
3418 instructions in output_function_prologue(), since it allows the scheduler
3419 to intermix instructions with the saves of the caller saved registers. In
3420 some cases, it might be necessary to emit a barrier instruction as the last
3421 insn to prevent such scheduling. */
3423 void
3424 ia64_expand_epilogue (int sibcall_p)
3426 rtx insn, reg, alt_reg, ar_unat_save_reg;
3427 int regno, alt_regno, cfa_off;
3429 ia64_compute_frame_size (get_frame_size ());
3431 /* If there is a frame pointer, then we use it instead of the stack
3432 pointer, so that the stack pointer does not need to be valid when
3433 the epilogue starts. See EXIT_IGNORE_STACK. */
3434 if (frame_pointer_needed)
3435 setup_spill_pointers (current_frame_info.n_spilled,
3436 hard_frame_pointer_rtx, 0);
3437 else
3438 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3439 current_frame_info.total_size);
3441 if (current_frame_info.total_size != 0)
3443 /* ??? At this point we must generate a magic insn that appears to
3444 modify the spill iterators and the frame pointer. This would
3445 allow the most scheduling freedom. For now, just hard stop. */
3446 emit_insn (gen_blockage ());
3449 /* Locate the bottom of the register save area. */
3450 cfa_off = (current_frame_info.spill_cfa_off
3451 + current_frame_info.spill_size
3452 + current_frame_info.extra_spill_size);
3454 /* Restore the predicate registers. */
3455 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3457 if (current_frame_info.r[reg_save_pr] != 0)
3459 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3460 reg_emitted (reg_save_pr);
3462 else
3464 alt_regno = next_scratch_gr_reg ();
3465 alt_reg = gen_rtx_REG (DImode, alt_regno);
3466 do_restore (gen_movdi_x, alt_reg, cfa_off);
3467 cfa_off -= 8;
3469 reg = gen_rtx_REG (DImode, PR_REG (0));
3470 emit_move_insn (reg, alt_reg);
3473 /* Restore the application registers. */
3475 /* Load the saved unat from the stack, but do not restore it until
3476 after the GRs have been restored. */
3477 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3479 if (current_frame_info.r[reg_save_ar_unat] != 0)
3481 ar_unat_save_reg
3482 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3483 reg_emitted (reg_save_ar_unat);
3485 else
3487 alt_regno = next_scratch_gr_reg ();
3488 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3489 current_frame_info.gr_used_mask |= 1 << alt_regno;
3490 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3491 cfa_off -= 8;
3494 else
3495 ar_unat_save_reg = NULL_RTX;
3497 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3499 reg_emitted (reg_save_ar_pfs);
3500 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3501 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3502 emit_move_insn (reg, alt_reg);
3504 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3506 alt_regno = next_scratch_gr_reg ();
3507 alt_reg = gen_rtx_REG (DImode, alt_regno);
3508 do_restore (gen_movdi_x, alt_reg, cfa_off);
3509 cfa_off -= 8;
3510 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3511 emit_move_insn (reg, alt_reg);
3514 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3516 if (current_frame_info.r[reg_save_ar_lc] != 0)
3518 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3519 reg_emitted (reg_save_ar_lc);
3521 else
3523 alt_regno = next_scratch_gr_reg ();
3524 alt_reg = gen_rtx_REG (DImode, alt_regno);
3525 do_restore (gen_movdi_x, alt_reg, cfa_off);
3526 cfa_off -= 8;
3528 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3529 emit_move_insn (reg, alt_reg);
3532 /* Restore the return pointer. */
3533 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3535 if (current_frame_info.r[reg_save_b0] != 0)
3537 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3538 reg_emitted (reg_save_b0);
3540 else
3542 alt_regno = next_scratch_gr_reg ();
3543 alt_reg = gen_rtx_REG (DImode, alt_regno);
3544 do_restore (gen_movdi_x, alt_reg, cfa_off);
3545 cfa_off -= 8;
3547 reg = gen_rtx_REG (DImode, BR_REG (0));
3548 emit_move_insn (reg, alt_reg);
3551 /* We should now be at the base of the gr/br/fr spill area. */
3552 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3553 + current_frame_info.spill_size));
3555 /* The GP may be stored on the stack in the prologue, but it's
3556 never restored in the epilogue. Skip the stack slot. */
3557 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3558 cfa_off -= 8;
3560 /* Restore all general registers. */
3561 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3562 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3564 reg = gen_rtx_REG (DImode, regno);
3565 do_restore (gen_gr_restore, reg, cfa_off);
3566 cfa_off -= 8;
3569 /* Restore the branch registers. */
3570 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3571 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3573 alt_regno = next_scratch_gr_reg ();
3574 alt_reg = gen_rtx_REG (DImode, alt_regno);
3575 do_restore (gen_movdi_x, alt_reg, cfa_off);
3576 cfa_off -= 8;
3577 reg = gen_rtx_REG (DImode, regno);
3578 emit_move_insn (reg, alt_reg);
3581 /* Restore floating point registers. */
3582 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3583 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3585 gcc_assert (!(cfa_off & 15));
3586 reg = gen_rtx_REG (XFmode, regno);
3587 do_restore (gen_fr_restore_x, reg, cfa_off);
3588 cfa_off -= 16;
3591 /* Restore ar.unat for real. */
3592 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3594 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3595 emit_move_insn (reg, ar_unat_save_reg);
3598 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3600 finish_spill_pointers ();
3602 if (current_frame_info.total_size
3603 || cfun->machine->ia64_eh_epilogue_sp
3604 || frame_pointer_needed)
3606 /* ??? At this point we must generate a magic insn that appears to
3607 modify the spill iterators, the stack pointer, and the frame
3608 pointer. This would allow the most scheduling freedom. For now,
3609 just hard stop. */
3610 emit_insn (gen_blockage ());
3613 if (cfun->machine->ia64_eh_epilogue_sp)
3614 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3615 else if (frame_pointer_needed)
3617 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3618 RTX_FRAME_RELATED_P (insn) = 1;
3620 else if (current_frame_info.total_size)
3622 rtx offset, frame_size_rtx;
3624 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3625 if (satisfies_constraint_I (frame_size_rtx))
3626 offset = frame_size_rtx;
3627 else
3629 regno = next_scratch_gr_reg ();
3630 offset = gen_rtx_REG (DImode, regno);
3631 emit_move_insn (offset, frame_size_rtx);
3634 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3635 offset));
3637 RTX_FRAME_RELATED_P (insn) = 1;
3638 if (GET_CODE (offset) != CONST_INT)
3639 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3640 gen_rtx_SET (VOIDmode,
3641 stack_pointer_rtx,
3642 gen_rtx_PLUS (DImode,
3643 stack_pointer_rtx,
3644 frame_size_rtx)));
3647 if (cfun->machine->ia64_eh_epilogue_bsp)
3648 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3650 if (! sibcall_p)
3651 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3652 else
3654 int fp = GR_REG (2);
3655 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3656 first available call clobbered register. If there was a frame_pointer
3657 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3658 so we have to make sure we're using the string "r2" when emitting
3659 the register name for the assembler. */
3660 if (current_frame_info.r[reg_fp]
3661 && current_frame_info.r[reg_fp] == GR_REG (2))
3662 fp = HARD_FRAME_POINTER_REGNUM;
3664 /* We must emit an alloc to force the input registers to become output
3665 registers. Otherwise, if the callee tries to pass its parameters
3666 through to another call without an intervening alloc, then these
3667 values get lost. */
3668 /* ??? We don't need to preserve all input registers. We only need to
3669 preserve those input registers used as arguments to the sibling call.
3670 It is unclear how to compute that number here. */
3671 if (current_frame_info.n_input_regs != 0)
3673 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3674 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3675 const0_rtx, const0_rtx,
3676 n_inputs, const0_rtx));
3677 RTX_FRAME_RELATED_P (insn) = 1;
3682 /* Return 1 if br.ret can do all the work required to return from a
3683 function. */
3686 ia64_direct_return (void)
3688 if (reload_completed && ! frame_pointer_needed)
3690 ia64_compute_frame_size (get_frame_size ());
3692 return (current_frame_info.total_size == 0
3693 && current_frame_info.n_spilled == 0
3694 && current_frame_info.r[reg_save_b0] == 0
3695 && current_frame_info.r[reg_save_pr] == 0
3696 && current_frame_info.r[reg_save_ar_pfs] == 0
3697 && current_frame_info.r[reg_save_ar_unat] == 0
3698 && current_frame_info.r[reg_save_ar_lc] == 0);
3700 return 0;
3703 /* Return the magic cookie that we use to hold the return address
3704 during early compilation. */
3707 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3709 if (count != 0)
3710 return NULL;
3711 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3714 /* Split this value after reload, now that we know where the return
3715 address is saved. */
3717 void
3718 ia64_split_return_addr_rtx (rtx dest)
3720 rtx src;
3722 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3724 if (current_frame_info.r[reg_save_b0] != 0)
3726 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3727 reg_emitted (reg_save_b0);
3729 else
3731 HOST_WIDE_INT off;
3732 unsigned int regno;
3733 rtx off_r;
3735 /* Compute offset from CFA for BR0. */
3736 /* ??? Must be kept in sync with ia64_expand_prologue. */
3737 off = (current_frame_info.spill_cfa_off
3738 + current_frame_info.spill_size);
3739 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3740 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3741 off -= 8;
3743 /* Convert CFA offset to a register based offset. */
3744 if (frame_pointer_needed)
3745 src = hard_frame_pointer_rtx;
3746 else
3748 src = stack_pointer_rtx;
3749 off += current_frame_info.total_size;
3752 /* Load address into scratch register. */
3753 off_r = GEN_INT (off);
3754 if (satisfies_constraint_I (off_r))
3755 emit_insn (gen_adddi3 (dest, src, off_r));
3756 else
3758 emit_move_insn (dest, off_r);
3759 emit_insn (gen_adddi3 (dest, src, dest));
3762 src = gen_rtx_MEM (Pmode, dest);
3765 else
3766 src = gen_rtx_REG (DImode, BR_REG (0));
3768 emit_move_insn (dest, src);
3772 ia64_hard_regno_rename_ok (int from, int to)
3774 /* Don't clobber any of the registers we reserved for the prologue. */
3775 unsigned int r;
3777 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3778 if (to == current_frame_info.r[r]
3779 || from == current_frame_info.r[r]
3780 || to == emitted_frame_related_regs[r]
3781 || from == emitted_frame_related_regs[r])
3782 return 0;
3784 /* Don't use output registers outside the register frame. */
3785 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3786 return 0;
3788 /* Retain even/oddness on predicate register pairs. */
3789 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3790 return (from & 1) == (to & 1);
3792 return 1;
3795 /* Target hook for assembling integer objects. Handle word-sized
3796 aligned objects and detect the cases when @fptr is needed. */
3798 static bool
3799 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3801 if (size == POINTER_SIZE / BITS_PER_UNIT
3802 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3803 && GET_CODE (x) == SYMBOL_REF
3804 && SYMBOL_REF_FUNCTION_P (x))
3806 static const char * const directive[2][2] = {
3807 /* 64-bit pointer */ /* 32-bit pointer */
3808 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3809 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3811 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3812 output_addr_const (asm_out_file, x);
3813 fputs (")\n", asm_out_file);
3814 return true;
3816 return default_assemble_integer (x, size, aligned_p);
3819 /* Emit the function prologue. */
3821 static void
3822 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3824 int mask, grsave, grsave_prev;
3826 if (current_frame_info.need_regstk)
3827 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3828 current_frame_info.n_input_regs,
3829 current_frame_info.n_local_regs,
3830 current_frame_info.n_output_regs,
3831 current_frame_info.n_rotate_regs);
3833 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3834 return;
3836 /* Emit the .prologue directive. */
3838 mask = 0;
3839 grsave = grsave_prev = 0;
3840 if (current_frame_info.r[reg_save_b0] != 0)
3842 mask |= 8;
3843 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3845 if (current_frame_info.r[reg_save_ar_pfs] != 0
3846 && (grsave_prev == 0
3847 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3849 mask |= 4;
3850 if (grsave_prev == 0)
3851 grsave = current_frame_info.r[reg_save_ar_pfs];
3852 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3854 if (current_frame_info.r[reg_fp] != 0
3855 && (grsave_prev == 0
3856 || current_frame_info.r[reg_fp] == grsave_prev + 1))
3858 mask |= 2;
3859 if (grsave_prev == 0)
3860 grsave = HARD_FRAME_POINTER_REGNUM;
3861 grsave_prev = current_frame_info.r[reg_fp];
3863 if (current_frame_info.r[reg_save_pr] != 0
3864 && (grsave_prev == 0
3865 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3867 mask |= 1;
3868 if (grsave_prev == 0)
3869 grsave = current_frame_info.r[reg_save_pr];
3872 if (mask && TARGET_GNU_AS)
3873 fprintf (file, "\t.prologue %d, %d\n", mask,
3874 ia64_dbx_register_number (grsave));
3875 else
3876 fputs ("\t.prologue\n", file);
3878 /* Emit a .spill directive, if necessary, to relocate the base of
3879 the register spill area. */
3880 if (current_frame_info.spill_cfa_off != -16)
3881 fprintf (file, "\t.spill %ld\n",
3882 (long) (current_frame_info.spill_cfa_off
3883 + current_frame_info.spill_size));
3886 /* Emit the .body directive at the scheduled end of the prologue. */
3888 static void
3889 ia64_output_function_end_prologue (FILE *file)
3891 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3892 return;
3894 fputs ("\t.body\n", file);
3897 /* Emit the function epilogue. */
3899 static void
3900 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3901 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3903 int i;
3905 if (current_frame_info.r[reg_fp])
3907 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3908 reg_names[HARD_FRAME_POINTER_REGNUM]
3909 = reg_names[current_frame_info.r[reg_fp]];
3910 reg_names[current_frame_info.r[reg_fp]] = tmp;
3911 reg_emitted (reg_fp);
3913 if (! TARGET_REG_NAMES)
3915 for (i = 0; i < current_frame_info.n_input_regs; i++)
3916 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3917 for (i = 0; i < current_frame_info.n_local_regs; i++)
3918 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3919 for (i = 0; i < current_frame_info.n_output_regs; i++)
3920 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3923 current_frame_info.initialized = 0;
3927 ia64_dbx_register_number (int regno)
3929 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3930 from its home at loc79 to something inside the register frame. We
3931 must perform the same renumbering here for the debug info. */
3932 if (current_frame_info.r[reg_fp])
3934 if (regno == HARD_FRAME_POINTER_REGNUM)
3935 regno = current_frame_info.r[reg_fp];
3936 else if (regno == current_frame_info.r[reg_fp])
3937 regno = HARD_FRAME_POINTER_REGNUM;
3940 if (IN_REGNO_P (regno))
3941 return 32 + regno - IN_REG (0);
3942 else if (LOC_REGNO_P (regno))
3943 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3944 else if (OUT_REGNO_P (regno))
3945 return (32 + current_frame_info.n_input_regs
3946 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3947 else
3948 return regno;
3951 /* Implement TARGET_TRAMPOLINE_INIT.
3953 The trampoline should set the static chain pointer to value placed
3954 into the trampoline and should branch to the specified routine.
3955 To make the normal indirect-subroutine calling convention work,
3956 the trampoline must look like a function descriptor; the first
3957 word being the target address and the second being the target's
3958 global pointer.
3960 We abuse the concept of a global pointer by arranging for it
3961 to point to the data we need to load. The complete trampoline
3962 has the following form:
3964 +-------------------+ \
3965 TRAMP: | __ia64_trampoline | |
3966 +-------------------+ > fake function descriptor
3967 | TRAMP+16 | |
3968 +-------------------+ /
3969 | target descriptor |
3970 +-------------------+
3971 | static link |
3972 +-------------------+
3975 static void
3976 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
3978 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3979 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
3981 /* The Intel assembler requires that the global __ia64_trampoline symbol
3982 be declared explicitly */
3983 if (!TARGET_GNU_AS)
3985 static bool declared_ia64_trampoline = false;
3987 if (!declared_ia64_trampoline)
3989 declared_ia64_trampoline = true;
3990 (*targetm.asm_out.globalize_label) (asm_out_file,
3991 "__ia64_trampoline");
3995 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3996 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
3997 fnaddr = convert_memory_address (Pmode, fnaddr);
3998 static_chain = convert_memory_address (Pmode, static_chain);
4000 /* Load up our iterator. */
4001 addr_reg = copy_to_reg (addr);
4002 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4004 /* The first two words are the fake descriptor:
4005 __ia64_trampoline, ADDR+16. */
4006 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4007 if (TARGET_ABI_OPEN_VMS)
4009 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4010 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4011 relocation against function symbols to make it identical to the
4012 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4013 strict ELF and dereference to get the bare code address. */
4014 rtx reg = gen_reg_rtx (Pmode);
4015 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4016 emit_move_insn (reg, tramp);
4017 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4018 tramp = reg;
4020 emit_move_insn (m_tramp, tramp);
4021 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4022 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4024 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4025 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4026 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4028 /* The third word is the target descriptor. */
4029 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4030 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4031 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4033 /* The fourth word is the static chain. */
4034 emit_move_insn (m_tramp, static_chain);
4037 /* Do any needed setup for a variadic function. CUM has not been updated
4038 for the last named argument which has type TYPE and mode MODE.
4040 We generate the actual spill instructions during prologue generation. */
4042 static void
4043 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4044 tree type, int * pretend_size,
4045 int second_time ATTRIBUTE_UNUSED)
4047 CUMULATIVE_ARGS next_cum = *cum;
4049 /* Skip the current argument. */
4050 ia64_function_arg_advance (&next_cum, mode, type, 1);
4052 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4054 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4055 *pretend_size = n * UNITS_PER_WORD;
4056 cfun->machine->n_varargs = n;
4060 /* Check whether TYPE is a homogeneous floating point aggregate. If
4061 it is, return the mode of the floating point type that appears
4062 in all leafs. If it is not, return VOIDmode.
4064 An aggregate is a homogeneous floating point aggregate is if all
4065 fields/elements in it have the same floating point type (e.g,
4066 SFmode). 128-bit quad-precision floats are excluded.
4068 Variable sized aggregates should never arrive here, since we should
4069 have already decided to pass them by reference. Top-level zero-sized
4070 aggregates are excluded because our parallels crash the middle-end. */
4072 static enum machine_mode
4073 hfa_element_mode (const_tree type, bool nested)
4075 enum machine_mode element_mode = VOIDmode;
4076 enum machine_mode mode;
4077 enum tree_code code = TREE_CODE (type);
4078 int know_element_mode = 0;
4079 tree t;
4081 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4082 return VOIDmode;
4084 switch (code)
4086 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4087 case BOOLEAN_TYPE: case POINTER_TYPE:
4088 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4089 case LANG_TYPE: case FUNCTION_TYPE:
4090 return VOIDmode;
4092 /* Fortran complex types are supposed to be HFAs, so we need to handle
4093 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4094 types though. */
4095 case COMPLEX_TYPE:
4096 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4097 && TYPE_MODE (type) != TCmode)
4098 return GET_MODE_INNER (TYPE_MODE (type));
4099 else
4100 return VOIDmode;
4102 case REAL_TYPE:
4103 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4104 mode if this is contained within an aggregate. */
4105 if (nested && TYPE_MODE (type) != TFmode)
4106 return TYPE_MODE (type);
4107 else
4108 return VOIDmode;
4110 case ARRAY_TYPE:
4111 return hfa_element_mode (TREE_TYPE (type), 1);
4113 case RECORD_TYPE:
4114 case UNION_TYPE:
4115 case QUAL_UNION_TYPE:
4116 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
4118 if (TREE_CODE (t) != FIELD_DECL)
4119 continue;
4121 mode = hfa_element_mode (TREE_TYPE (t), 1);
4122 if (know_element_mode)
4124 if (mode != element_mode)
4125 return VOIDmode;
4127 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4128 return VOIDmode;
4129 else
4131 know_element_mode = 1;
4132 element_mode = mode;
4135 return element_mode;
4137 default:
4138 /* If we reach here, we probably have some front-end specific type
4139 that the backend doesn't know about. This can happen via the
4140 aggregate_value_p call in init_function_start. All we can do is
4141 ignore unknown tree types. */
4142 return VOIDmode;
4145 return VOIDmode;
4148 /* Return the number of words required to hold a quantity of TYPE and MODE
4149 when passed as an argument. */
4150 static int
4151 ia64_function_arg_words (tree type, enum machine_mode mode)
4153 int words;
4155 if (mode == BLKmode)
4156 words = int_size_in_bytes (type);
4157 else
4158 words = GET_MODE_SIZE (mode);
4160 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4163 /* Return the number of registers that should be skipped so the current
4164 argument (described by TYPE and WORDS) will be properly aligned.
4166 Integer and float arguments larger than 8 bytes start at the next
4167 even boundary. Aggregates larger than 8 bytes start at the next
4168 even boundary if the aggregate has 16 byte alignment. Note that
4169 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4170 but are still to be aligned in registers.
4172 ??? The ABI does not specify how to handle aggregates with
4173 alignment from 9 to 15 bytes, or greater than 16. We handle them
4174 all as if they had 16 byte alignment. Such aggregates can occur
4175 only if gcc extensions are used. */
4176 static int
4177 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4179 /* No registers are skipped on VMS. */
4180 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4181 return 0;
4183 if (type
4184 && TREE_CODE (type) != INTEGER_TYPE
4185 && TREE_CODE (type) != REAL_TYPE)
4186 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4187 else
4188 return words > 1;
4191 /* Return rtx for register where argument is passed, or zero if it is passed
4192 on the stack. */
4193 /* ??? 128-bit quad-precision floats are always passed in general
4194 registers. */
4197 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4198 int named, int incoming)
4200 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4201 int words = ia64_function_arg_words (type, mode);
4202 int offset = ia64_function_arg_offset (cum, type, words);
4203 enum machine_mode hfa_mode = VOIDmode;
4205 /* For OPEN VMS, emit the instruction setting up the argument register here,
4206 when we know this will be together with the other arguments setup related
4207 insns. This is not the conceptually best place to do this, but this is
4208 the easiest as we have convenient access to cumulative args info. */
4210 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4211 && named == 1)
4213 unsigned HOST_WIDE_INT regval = cum->words;
4214 int i;
4216 for (i = 0; i < 8; i++)
4217 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4219 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4220 GEN_INT (regval));
4223 /* If all argument slots are used, then it must go on the stack. */
4224 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4225 return 0;
4227 /* Check for and handle homogeneous FP aggregates. */
4228 if (type)
4229 hfa_mode = hfa_element_mode (type, 0);
4231 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4232 and unprototyped hfas are passed specially. */
4233 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4235 rtx loc[16];
4236 int i = 0;
4237 int fp_regs = cum->fp_regs;
4238 int int_regs = cum->words + offset;
4239 int hfa_size = GET_MODE_SIZE (hfa_mode);
4240 int byte_size;
4241 int args_byte_size;
4243 /* If prototyped, pass it in FR regs then GR regs.
4244 If not prototyped, pass it in both FR and GR regs.
4246 If this is an SFmode aggregate, then it is possible to run out of
4247 FR regs while GR regs are still left. In that case, we pass the
4248 remaining part in the GR regs. */
4250 /* Fill the FP regs. We do this always. We stop if we reach the end
4251 of the argument, the last FP register, or the last argument slot. */
4253 byte_size = ((mode == BLKmode)
4254 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4255 args_byte_size = int_regs * UNITS_PER_WORD;
4256 offset = 0;
4257 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4258 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4260 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4261 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4262 + fp_regs)),
4263 GEN_INT (offset));
4264 offset += hfa_size;
4265 args_byte_size += hfa_size;
4266 fp_regs++;
4269 /* If no prototype, then the whole thing must go in GR regs. */
4270 if (! cum->prototype)
4271 offset = 0;
4272 /* If this is an SFmode aggregate, then we might have some left over
4273 that needs to go in GR regs. */
4274 else if (byte_size != offset)
4275 int_regs += offset / UNITS_PER_WORD;
4277 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4279 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4281 enum machine_mode gr_mode = DImode;
4282 unsigned int gr_size;
4284 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4285 then this goes in a GR reg left adjusted/little endian, right
4286 adjusted/big endian. */
4287 /* ??? Currently this is handled wrong, because 4-byte hunks are
4288 always right adjusted/little endian. */
4289 if (offset & 0x4)
4290 gr_mode = SImode;
4291 /* If we have an even 4 byte hunk because the aggregate is a
4292 multiple of 4 bytes in size, then this goes in a GR reg right
4293 adjusted/little endian. */
4294 else if (byte_size - offset == 4)
4295 gr_mode = SImode;
4297 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4298 gen_rtx_REG (gr_mode, (basereg
4299 + int_regs)),
4300 GEN_INT (offset));
4302 gr_size = GET_MODE_SIZE (gr_mode);
4303 offset += gr_size;
4304 if (gr_size == UNITS_PER_WORD
4305 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4306 int_regs++;
4307 else if (gr_size > UNITS_PER_WORD)
4308 int_regs += gr_size / UNITS_PER_WORD;
4310 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4313 /* On OpenVMS variable argument is either in Rn or Fn. */
4314 else if (TARGET_ABI_OPEN_VMS && named == 0)
4316 if (FLOAT_MODE_P (mode))
4317 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4318 else
4319 return gen_rtx_REG (mode, basereg + cum->words);
4322 /* Integral and aggregates go in general registers. If we have run out of
4323 FR registers, then FP values must also go in general registers. This can
4324 happen when we have a SFmode HFA. */
4325 else if (mode == TFmode || mode == TCmode
4326 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4328 int byte_size = ((mode == BLKmode)
4329 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4330 if (BYTES_BIG_ENDIAN
4331 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4332 && byte_size < UNITS_PER_WORD
4333 && byte_size > 0)
4335 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4336 gen_rtx_REG (DImode,
4337 (basereg + cum->words
4338 + offset)),
4339 const0_rtx);
4340 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4342 else
4343 return gen_rtx_REG (mode, basereg + cum->words + offset);
4347 /* If there is a prototype, then FP values go in a FR register when
4348 named, and in a GR register when unnamed. */
4349 else if (cum->prototype)
4351 if (named)
4352 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4353 /* In big-endian mode, an anonymous SFmode value must be represented
4354 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4355 the value into the high half of the general register. */
4356 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4357 return gen_rtx_PARALLEL (mode,
4358 gen_rtvec (1,
4359 gen_rtx_EXPR_LIST (VOIDmode,
4360 gen_rtx_REG (DImode, basereg + cum->words + offset),
4361 const0_rtx)));
4362 else
4363 return gen_rtx_REG (mode, basereg + cum->words + offset);
4365 /* If there is no prototype, then FP values go in both FR and GR
4366 registers. */
4367 else
4369 /* See comment above. */
4370 enum machine_mode inner_mode =
4371 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4373 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4374 gen_rtx_REG (mode, (FR_ARG_FIRST
4375 + cum->fp_regs)),
4376 const0_rtx);
4377 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4378 gen_rtx_REG (inner_mode,
4379 (basereg + cum->words
4380 + offset)),
4381 const0_rtx);
4383 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4387 /* Return number of bytes, at the beginning of the argument, that must be
4388 put in registers. 0 is the argument is entirely in registers or entirely
4389 in memory. */
4391 static int
4392 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4393 tree type, bool named ATTRIBUTE_UNUSED)
4395 int words = ia64_function_arg_words (type, mode);
4396 int offset = ia64_function_arg_offset (cum, type, words);
4398 /* If all argument slots are used, then it must go on the stack. */
4399 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4400 return 0;
4402 /* It doesn't matter whether the argument goes in FR or GR regs. If
4403 it fits within the 8 argument slots, then it goes entirely in
4404 registers. If it extends past the last argument slot, then the rest
4405 goes on the stack. */
4407 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4408 return 0;
4410 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4413 /* Return ivms_arg_type based on machine_mode. */
4415 static enum ivms_arg_type
4416 ia64_arg_type (enum machine_mode mode)
4418 switch (mode)
4420 case SFmode:
4421 return FS;
4422 case DFmode:
4423 return FT;
4424 default:
4425 return I64;
4429 /* Update CUM to point after this argument. This is patterned after
4430 ia64_function_arg. */
4432 void
4433 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4434 tree type, int named)
4436 int words = ia64_function_arg_words (type, mode);
4437 int offset = ia64_function_arg_offset (cum, type, words);
4438 enum machine_mode hfa_mode = VOIDmode;
4440 /* If all arg slots are already full, then there is nothing to do. */
4441 if (cum->words >= MAX_ARGUMENT_SLOTS)
4443 cum->words += words + offset;
4444 return;
4447 cum->atypes[cum->words] = ia64_arg_type (mode);
4448 cum->words += words + offset;
4450 /* Check for and handle homogeneous FP aggregates. */
4451 if (type)
4452 hfa_mode = hfa_element_mode (type, 0);
4454 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4455 and unprototyped hfas are passed specially. */
4456 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4458 int fp_regs = cum->fp_regs;
4459 /* This is the original value of cum->words + offset. */
4460 int int_regs = cum->words - words;
4461 int hfa_size = GET_MODE_SIZE (hfa_mode);
4462 int byte_size;
4463 int args_byte_size;
4465 /* If prototyped, pass it in FR regs then GR regs.
4466 If not prototyped, pass it in both FR and GR regs.
4468 If this is an SFmode aggregate, then it is possible to run out of
4469 FR regs while GR regs are still left. In that case, we pass the
4470 remaining part in the GR regs. */
4472 /* Fill the FP regs. We do this always. We stop if we reach the end
4473 of the argument, the last FP register, or the last argument slot. */
4475 byte_size = ((mode == BLKmode)
4476 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4477 args_byte_size = int_regs * UNITS_PER_WORD;
4478 offset = 0;
4479 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4480 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4482 offset += hfa_size;
4483 args_byte_size += hfa_size;
4484 fp_regs++;
4487 cum->fp_regs = fp_regs;
4490 /* On OpenVMS variable argument is either in Rn or Fn. */
4491 else if (TARGET_ABI_OPEN_VMS && named == 0)
4493 cum->int_regs = cum->words;
4494 cum->fp_regs = cum->words;
4497 /* Integral and aggregates go in general registers. So do TFmode FP values.
4498 If we have run out of FR registers, then other FP values must also go in
4499 general registers. This can happen when we have a SFmode HFA. */
4500 else if (mode == TFmode || mode == TCmode
4501 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4502 cum->int_regs = cum->words;
4504 /* If there is a prototype, then FP values go in a FR register when
4505 named, and in a GR register when unnamed. */
4506 else if (cum->prototype)
4508 if (! named)
4509 cum->int_regs = cum->words;
4510 else
4511 /* ??? Complex types should not reach here. */
4512 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4514 /* If there is no prototype, then FP values go in both FR and GR
4515 registers. */
4516 else
4518 /* ??? Complex types should not reach here. */
4519 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4520 cum->int_regs = cum->words;
4524 /* Arguments with alignment larger than 8 bytes start at the next even
4525 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
4526 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4529 ia64_function_arg_boundary (enum machine_mode mode, tree type)
4532 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4533 return PARM_BOUNDARY * 2;
4535 if (type)
4537 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4538 return PARM_BOUNDARY * 2;
4539 else
4540 return PARM_BOUNDARY;
4543 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4544 return PARM_BOUNDARY * 2;
4545 else
4546 return PARM_BOUNDARY;
4549 /* True if it is OK to do sibling call optimization for the specified
4550 call expression EXP. DECL will be the called function, or NULL if
4551 this is an indirect call. */
4552 static bool
4553 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4555 /* We can't perform a sibcall if the current function has the syscall_linkage
4556 attribute. */
4557 if (lookup_attribute ("syscall_linkage",
4558 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4559 return false;
4561 /* We must always return with our current GP. This means we can
4562 only sibcall to functions defined in the current module unless
4563 TARGET_CONST_GP is set to true. */
4564 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4568 /* Implement va_arg. */
4570 static tree
4571 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4572 gimple_seq *post_p)
4574 /* Variable sized types are passed by reference. */
4575 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4577 tree ptrtype = build_pointer_type (type);
4578 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4579 return build_va_arg_indirect_ref (addr);
4582 /* Aggregate arguments with alignment larger than 8 bytes start at
4583 the next even boundary. Integer and floating point arguments
4584 do so if they are larger than 8 bytes, whether or not they are
4585 also aligned larger than 8 bytes. */
4586 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4587 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4589 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4590 size_int (2 * UNITS_PER_WORD - 1));
4591 t = fold_convert (sizetype, t);
4592 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4593 size_int (-2 * UNITS_PER_WORD));
4594 t = fold_convert (TREE_TYPE (valist), t);
4595 gimplify_assign (unshare_expr (valist), t, pre_p);
4598 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4601 /* Return 1 if function return value returned in memory. Return 0 if it is
4602 in a register. */
4604 static bool
4605 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4607 enum machine_mode mode;
4608 enum machine_mode hfa_mode;
4609 HOST_WIDE_INT byte_size;
4611 mode = TYPE_MODE (valtype);
4612 byte_size = GET_MODE_SIZE (mode);
4613 if (mode == BLKmode)
4615 byte_size = int_size_in_bytes (valtype);
4616 if (byte_size < 0)
4617 return true;
4620 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4622 hfa_mode = hfa_element_mode (valtype, 0);
4623 if (hfa_mode != VOIDmode)
4625 int hfa_size = GET_MODE_SIZE (hfa_mode);
4627 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4628 return true;
4629 else
4630 return false;
4632 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4633 return true;
4634 else
4635 return false;
4638 /* Return rtx for register that holds the function return value. */
4641 ia64_function_value (const_tree valtype, const_tree func)
4643 enum machine_mode mode;
4644 enum machine_mode hfa_mode;
4645 int unsignedp;
4647 mode = TYPE_MODE (valtype);
4648 hfa_mode = hfa_element_mode (valtype, 0);
4650 if (hfa_mode != VOIDmode)
4652 rtx loc[8];
4653 int i;
4654 int hfa_size;
4655 int byte_size;
4656 int offset;
4658 hfa_size = GET_MODE_SIZE (hfa_mode);
4659 byte_size = ((mode == BLKmode)
4660 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4661 offset = 0;
4662 for (i = 0; offset < byte_size; i++)
4664 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4665 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4666 GEN_INT (offset));
4667 offset += hfa_size;
4669 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4671 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4672 return gen_rtx_REG (mode, FR_ARG_FIRST);
4673 else
4675 bool need_parallel = false;
4677 /* In big-endian mode, we need to manage the layout of aggregates
4678 in the registers so that we get the bits properly aligned in
4679 the highpart of the registers. */
4680 if (BYTES_BIG_ENDIAN
4681 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4682 need_parallel = true;
4684 /* Something like struct S { long double x; char a[0] } is not an
4685 HFA structure, and therefore doesn't go in fp registers. But
4686 the middle-end will give it XFmode anyway, and XFmode values
4687 don't normally fit in integer registers. So we need to smuggle
4688 the value inside a parallel. */
4689 else if (mode == XFmode || mode == XCmode || mode == RFmode)
4690 need_parallel = true;
4692 if (need_parallel)
4694 rtx loc[8];
4695 int offset;
4696 int bytesize;
4697 int i;
4699 offset = 0;
4700 bytesize = int_size_in_bytes (valtype);
4701 /* An empty PARALLEL is invalid here, but the return value
4702 doesn't matter for empty structs. */
4703 if (bytesize == 0)
4704 return gen_rtx_REG (mode, GR_RET_FIRST);
4705 for (i = 0; offset < bytesize; i++)
4707 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4708 gen_rtx_REG (DImode,
4709 GR_RET_FIRST + i),
4710 GEN_INT (offset));
4711 offset += UNITS_PER_WORD;
4713 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4716 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4717 func ? TREE_TYPE (func) : NULL_TREE,
4718 true);
4720 return gen_rtx_REG (mode, GR_RET_FIRST);
4724 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4725 We need to emit DTP-relative relocations. */
4727 static void
4728 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4730 gcc_assert (size == 4 || size == 8);
4731 if (size == 4)
4732 fputs ("\tdata4.ua\t@dtprel(", file);
4733 else
4734 fputs ("\tdata8.ua\t@dtprel(", file);
4735 output_addr_const (file, x);
4736 fputs (")", file);
4739 /* Print a memory address as an operand to reference that memory location. */
4741 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4742 also call this from ia64_print_operand for memory addresses. */
4744 void
4745 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4746 rtx address ATTRIBUTE_UNUSED)
4750 /* Print an operand to an assembler instruction.
4751 C Swap and print a comparison operator.
4752 D Print an FP comparison operator.
4753 E Print 32 - constant, for SImode shifts as extract.
4754 e Print 64 - constant, for DImode rotates.
4755 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4756 a floating point register emitted normally.
4757 G A floating point constant.
4758 I Invert a predicate register by adding 1.
4759 J Select the proper predicate register for a condition.
4760 j Select the inverse predicate register for a condition.
4761 O Append .acq for volatile load.
4762 P Postincrement of a MEM.
4763 Q Append .rel for volatile store.
4764 R Print .s .d or nothing for a single, double or no truncation.
4765 S Shift amount for shladd instruction.
4766 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4767 for Intel assembler.
4768 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4769 for Intel assembler.
4770 X A pair of floating point registers.
4771 r Print register name, or constant 0 as r0. HP compatibility for
4772 Linux kernel.
4773 v Print vector constant value as an 8-byte integer value. */
4775 void
4776 ia64_print_operand (FILE * file, rtx x, int code)
4778 const char *str;
4780 switch (code)
4782 case 0:
4783 /* Handled below. */
4784 break;
4786 case 'C':
4788 enum rtx_code c = swap_condition (GET_CODE (x));
4789 fputs (GET_RTX_NAME (c), file);
4790 return;
4793 case 'D':
4794 switch (GET_CODE (x))
4796 case NE:
4797 str = "neq";
4798 break;
4799 case UNORDERED:
4800 str = "unord";
4801 break;
4802 case ORDERED:
4803 str = "ord";
4804 break;
4805 case UNLT:
4806 str = "nge";
4807 break;
4808 case UNLE:
4809 str = "ngt";
4810 break;
4811 case UNGT:
4812 str = "nle";
4813 break;
4814 case UNGE:
4815 str = "nlt";
4816 break;
4817 default:
4818 str = GET_RTX_NAME (GET_CODE (x));
4819 break;
4821 fputs (str, file);
4822 return;
4824 case 'E':
4825 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4826 return;
4828 case 'e':
4829 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4830 return;
4832 case 'F':
4833 if (x == CONST0_RTX (GET_MODE (x)))
4834 str = reg_names [FR_REG (0)];
4835 else if (x == CONST1_RTX (GET_MODE (x)))
4836 str = reg_names [FR_REG (1)];
4837 else
4839 gcc_assert (GET_CODE (x) == REG);
4840 str = reg_names [REGNO (x)];
4842 fputs (str, file);
4843 return;
4845 case 'G':
4847 long val[4];
4848 REAL_VALUE_TYPE rv;
4849 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4850 real_to_target (val, &rv, GET_MODE (x));
4851 if (GET_MODE (x) == SFmode)
4852 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4853 else if (GET_MODE (x) == DFmode)
4854 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4855 & 0xffffffff,
4856 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4857 & 0xffffffff);
4858 else
4859 output_operand_lossage ("invalid %%G mode");
4861 return;
4863 case 'I':
4864 fputs (reg_names [REGNO (x) + 1], file);
4865 return;
4867 case 'J':
4868 case 'j':
4870 unsigned int regno = REGNO (XEXP (x, 0));
4871 if (GET_CODE (x) == EQ)
4872 regno += 1;
4873 if (code == 'j')
4874 regno ^= 1;
4875 fputs (reg_names [regno], file);
4877 return;
4879 case 'O':
4880 if (MEM_VOLATILE_P (x))
4881 fputs(".acq", file);
4882 return;
4884 case 'P':
4886 HOST_WIDE_INT value;
4888 switch (GET_CODE (XEXP (x, 0)))
4890 default:
4891 return;
4893 case POST_MODIFY:
4894 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4895 if (GET_CODE (x) == CONST_INT)
4896 value = INTVAL (x);
4897 else
4899 gcc_assert (GET_CODE (x) == REG);
4900 fprintf (file, ", %s", reg_names[REGNO (x)]);
4901 return;
4903 break;
4905 case POST_INC:
4906 value = GET_MODE_SIZE (GET_MODE (x));
4907 break;
4909 case POST_DEC:
4910 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4911 break;
4914 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4915 return;
4918 case 'Q':
4919 if (MEM_VOLATILE_P (x))
4920 fputs(".rel", file);
4921 return;
4923 case 'R':
4924 if (x == CONST0_RTX (GET_MODE (x)))
4925 fputs(".s", file);
4926 else if (x == CONST1_RTX (GET_MODE (x)))
4927 fputs(".d", file);
4928 else if (x == CONST2_RTX (GET_MODE (x)))
4930 else
4931 output_operand_lossage ("invalid %%R value");
4932 return;
4934 case 'S':
4935 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4936 return;
4938 case 'T':
4939 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4941 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4942 return;
4944 break;
4946 case 'U':
4947 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4949 const char *prefix = "0x";
4950 if (INTVAL (x) & 0x80000000)
4952 fprintf (file, "0xffffffff");
4953 prefix = "";
4955 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4956 return;
4958 break;
4960 case 'X':
4962 unsigned int regno = REGNO (x);
4963 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4965 return;
4967 case 'r':
4968 /* If this operand is the constant zero, write it as register zero.
4969 Any register, zero, or CONST_INT value is OK here. */
4970 if (GET_CODE (x) == REG)
4971 fputs (reg_names[REGNO (x)], file);
4972 else if (x == CONST0_RTX (GET_MODE (x)))
4973 fputs ("r0", file);
4974 else if (GET_CODE (x) == CONST_INT)
4975 output_addr_const (file, x);
4976 else
4977 output_operand_lossage ("invalid %%r value");
4978 return;
4980 case 'v':
4981 gcc_assert (GET_CODE (x) == CONST_VECTOR);
4982 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4983 break;
4985 case '+':
4987 const char *which;
4989 /* For conditional branches, returns or calls, substitute
4990 sptk, dptk, dpnt, or spnt for %s. */
4991 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4992 if (x)
4994 int pred_val = INTVAL (XEXP (x, 0));
4996 /* Guess top and bottom 10% statically predicted. */
4997 if (pred_val < REG_BR_PROB_BASE / 50
4998 && br_prob_note_reliable_p (x))
4999 which = ".spnt";
5000 else if (pred_val < REG_BR_PROB_BASE / 2)
5001 which = ".dpnt";
5002 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5003 || !br_prob_note_reliable_p (x))
5004 which = ".dptk";
5005 else
5006 which = ".sptk";
5008 else if (GET_CODE (current_output_insn) == CALL_INSN)
5009 which = ".sptk";
5010 else
5011 which = ".dptk";
5013 fputs (which, file);
5014 return;
5017 case ',':
5018 x = current_insn_predicate;
5019 if (x)
5021 unsigned int regno = REGNO (XEXP (x, 0));
5022 if (GET_CODE (x) == EQ)
5023 regno += 1;
5024 fprintf (file, "(%s) ", reg_names [regno]);
5026 return;
5028 default:
5029 output_operand_lossage ("ia64_print_operand: unknown code");
5030 return;
5033 switch (GET_CODE (x))
5035 /* This happens for the spill/restore instructions. */
5036 case POST_INC:
5037 case POST_DEC:
5038 case POST_MODIFY:
5039 x = XEXP (x, 0);
5040 /* ... fall through ... */
5042 case REG:
5043 fputs (reg_names [REGNO (x)], file);
5044 break;
5046 case MEM:
5048 rtx addr = XEXP (x, 0);
5049 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5050 addr = XEXP (addr, 0);
5051 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5052 break;
5055 default:
5056 output_addr_const (file, x);
5057 break;
5060 return;
5063 /* Compute a (partial) cost for rtx X. Return true if the complete
5064 cost has been computed, and false if subexpressions should be
5065 scanned. In either case, *TOTAL contains the cost result. */
5066 /* ??? This is incomplete. */
5068 static bool
5069 ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5070 bool speed ATTRIBUTE_UNUSED)
5072 switch (code)
5074 case CONST_INT:
5075 switch (outer_code)
5077 case SET:
5078 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5079 return true;
5080 case PLUS:
5081 if (satisfies_constraint_I (x))
5082 *total = 0;
5083 else if (satisfies_constraint_J (x))
5084 *total = 1;
5085 else
5086 *total = COSTS_N_INSNS (1);
5087 return true;
5088 default:
5089 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5090 *total = 0;
5091 else
5092 *total = COSTS_N_INSNS (1);
5093 return true;
5096 case CONST_DOUBLE:
5097 *total = COSTS_N_INSNS (1);
5098 return true;
5100 case CONST:
5101 case SYMBOL_REF:
5102 case LABEL_REF:
5103 *total = COSTS_N_INSNS (3);
5104 return true;
5106 case MULT:
5107 /* For multiplies wider than HImode, we have to go to the FPU,
5108 which normally involves copies. Plus there's the latency
5109 of the multiply itself, and the latency of the instructions to
5110 transfer integer regs to FP regs. */
5111 /* ??? Check for FP mode. */
5112 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5113 *total = COSTS_N_INSNS (10);
5114 else
5115 *total = COSTS_N_INSNS (2);
5116 return true;
5118 case PLUS:
5119 case MINUS:
5120 case ASHIFT:
5121 case ASHIFTRT:
5122 case LSHIFTRT:
5123 *total = COSTS_N_INSNS (1);
5124 return true;
5126 case DIV:
5127 case UDIV:
5128 case MOD:
5129 case UMOD:
5130 /* We make divide expensive, so that divide-by-constant will be
5131 optimized to a multiply. */
5132 *total = COSTS_N_INSNS (60);
5133 return true;
5135 default:
5136 return false;
5140 /* Calculate the cost of moving data from a register in class FROM to
5141 one in class TO, using MODE. */
5144 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
5145 enum reg_class to)
5147 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5148 if (to == ADDL_REGS)
5149 to = GR_REGS;
5150 if (from == ADDL_REGS)
5151 from = GR_REGS;
5153 /* All costs are symmetric, so reduce cases by putting the
5154 lower number class as the destination. */
5155 if (from < to)
5157 enum reg_class tmp = to;
5158 to = from, from = tmp;
5161 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5162 so that we get secondary memory reloads. Between FR_REGS,
5163 we have to make this at least as expensive as MEMORY_MOVE_COST
5164 to avoid spectacularly poor register class preferencing. */
5165 if (mode == XFmode || mode == RFmode)
5167 if (to != GR_REGS || from != GR_REGS)
5168 return MEMORY_MOVE_COST (mode, to, 0);
5169 else
5170 return 3;
5173 switch (to)
5175 case PR_REGS:
5176 /* Moving between PR registers takes two insns. */
5177 if (from == PR_REGS)
5178 return 3;
5179 /* Moving between PR and anything but GR is impossible. */
5180 if (from != GR_REGS)
5181 return MEMORY_MOVE_COST (mode, to, 0);
5182 break;
5184 case BR_REGS:
5185 /* Moving between BR and anything but GR is impossible. */
5186 if (from != GR_REGS && from != GR_AND_BR_REGS)
5187 return MEMORY_MOVE_COST (mode, to, 0);
5188 break;
5190 case AR_I_REGS:
5191 case AR_M_REGS:
5192 /* Moving between AR and anything but GR is impossible. */
5193 if (from != GR_REGS)
5194 return MEMORY_MOVE_COST (mode, to, 0);
5195 break;
5197 case GR_REGS:
5198 case FR_REGS:
5199 case FP_REGS:
5200 case GR_AND_FR_REGS:
5201 case GR_AND_BR_REGS:
5202 case ALL_REGS:
5203 break;
5205 default:
5206 gcc_unreachable ();
5209 return 2;
5212 /* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
5213 to use when copying X into that class. */
5215 enum reg_class
5216 ia64_preferred_reload_class (rtx x, enum reg_class rclass)
5218 switch (rclass)
5220 case FR_REGS:
5221 case FP_REGS:
5222 /* Don't allow volatile mem reloads into floating point registers.
5223 This is defined to force reload to choose the r/m case instead
5224 of the f/f case when reloading (set (reg fX) (mem/v)). */
5225 if (MEM_P (x) && MEM_VOLATILE_P (x))
5226 return NO_REGS;
5228 /* Force all unrecognized constants into the constant pool. */
5229 if (CONSTANT_P (x))
5230 return NO_REGS;
5231 break;
5233 case AR_M_REGS:
5234 case AR_I_REGS:
5235 if (!OBJECT_P (x))
5236 return NO_REGS;
5237 break;
5239 default:
5240 break;
5243 return rclass;
5246 /* This function returns the register class required for a secondary
5247 register when copying between one of the registers in RCLASS, and X,
5248 using MODE. A return value of NO_REGS means that no secondary register
5249 is required. */
5251 enum reg_class
5252 ia64_secondary_reload_class (enum reg_class rclass,
5253 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5255 int regno = -1;
5257 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5258 regno = true_regnum (x);
5260 switch (rclass)
5262 case BR_REGS:
5263 case AR_M_REGS:
5264 case AR_I_REGS:
5265 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5266 interaction. We end up with two pseudos with overlapping lifetimes
5267 both of which are equiv to the same constant, and both which need
5268 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5269 changes depending on the path length, which means the qty_first_reg
5270 check in make_regs_eqv can give different answers at different times.
5271 At some point I'll probably need a reload_indi pattern to handle
5272 this.
5274 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5275 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5276 non-general registers for good measure. */
5277 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5278 return GR_REGS;
5280 /* This is needed if a pseudo used as a call_operand gets spilled to a
5281 stack slot. */
5282 if (GET_CODE (x) == MEM)
5283 return GR_REGS;
5284 break;
5286 case FR_REGS:
5287 case FP_REGS:
5288 /* Need to go through general registers to get to other class regs. */
5289 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5290 return GR_REGS;
5292 /* This can happen when a paradoxical subreg is an operand to the
5293 muldi3 pattern. */
5294 /* ??? This shouldn't be necessary after instruction scheduling is
5295 enabled, because paradoxical subregs are not accepted by
5296 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5297 stop the paradoxical subreg stupidity in the *_operand functions
5298 in recog.c. */
5299 if (GET_CODE (x) == MEM
5300 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5301 || GET_MODE (x) == QImode))
5302 return GR_REGS;
5304 /* This can happen because of the ior/and/etc patterns that accept FP
5305 registers as operands. If the third operand is a constant, then it
5306 needs to be reloaded into a FP register. */
5307 if (GET_CODE (x) == CONST_INT)
5308 return GR_REGS;
5310 /* This can happen because of register elimination in a muldi3 insn.
5311 E.g. `26107 * (unsigned long)&u'. */
5312 if (GET_CODE (x) == PLUS)
5313 return GR_REGS;
5314 break;
5316 case PR_REGS:
5317 /* ??? This happens if we cse/gcse a BImode value across a call,
5318 and the function has a nonlocal goto. This is because global
5319 does not allocate call crossing pseudos to hard registers when
5320 crtl->has_nonlocal_goto is true. This is relatively
5321 common for C++ programs that use exceptions. To reproduce,
5322 return NO_REGS and compile libstdc++. */
5323 if (GET_CODE (x) == MEM)
5324 return GR_REGS;
5326 /* This can happen when we take a BImode subreg of a DImode value,
5327 and that DImode value winds up in some non-GR register. */
5328 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5329 return GR_REGS;
5330 break;
5332 default:
5333 break;
5336 return NO_REGS;
5340 /* Implement targetm.unspec_may_trap_p hook. */
5341 static int
5342 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5344 if (GET_CODE (x) == UNSPEC)
5346 switch (XINT (x, 1))
5348 case UNSPEC_LDA:
5349 case UNSPEC_LDS:
5350 case UNSPEC_LDSA:
5351 case UNSPEC_LDCCLR:
5352 case UNSPEC_CHKACLR:
5353 case UNSPEC_CHKS:
5354 /* These unspecs are just wrappers. */
5355 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5359 return default_unspec_may_trap_p (x, flags);
5363 /* Parse the -mfixed-range= option string. */
5365 static void
5366 fix_range (const char *const_str)
5368 int i, first, last;
5369 char *str, *dash, *comma;
5371 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5372 REG2 are either register names or register numbers. The effect
5373 of this option is to mark the registers in the range from REG1 to
5374 REG2 as ``fixed'' so they won't be used by the compiler. This is
5375 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5377 i = strlen (const_str);
5378 str = (char *) alloca (i + 1);
5379 memcpy (str, const_str, i + 1);
5381 while (1)
5383 dash = strchr (str, '-');
5384 if (!dash)
5386 warning (0, "value of -mfixed-range must have form REG1-REG2");
5387 return;
5389 *dash = '\0';
5391 comma = strchr (dash + 1, ',');
5392 if (comma)
5393 *comma = '\0';
5395 first = decode_reg_name (str);
5396 if (first < 0)
5398 warning (0, "unknown register name: %s", str);
5399 return;
5402 last = decode_reg_name (dash + 1);
5403 if (last < 0)
5405 warning (0, "unknown register name: %s", dash + 1);
5406 return;
5409 *dash = '-';
5411 if (first > last)
5413 warning (0, "%s-%s is an empty range", str, dash + 1);
5414 return;
5417 for (i = first; i <= last; ++i)
5418 fixed_regs[i] = call_used_regs[i] = 1;
5420 if (!comma)
5421 break;
5423 *comma = ',';
5424 str = comma + 1;
5428 /* Implement TARGET_HANDLE_OPTION. */
5430 static bool
5431 ia64_handle_option (size_t code, const char *arg, int value)
5433 switch (code)
5435 case OPT_mfixed_range_:
5436 fix_range (arg);
5437 return true;
5439 case OPT_mtls_size_:
5440 if (value != 14 && value != 22 && value != 64)
5441 error ("bad value %<%s%> for -mtls-size= switch", arg);
5442 return true;
5444 case OPT_mtune_:
5446 static struct pta
5448 const char *name; /* processor name or nickname. */
5449 enum processor_type processor;
5451 const processor_alias_table[] =
5453 {"itanium2", PROCESSOR_ITANIUM2},
5454 {"mckinley", PROCESSOR_ITANIUM2},
5456 int const pta_size = ARRAY_SIZE (processor_alias_table);
5457 int i;
5459 for (i = 0; i < pta_size; i++)
5460 if (!strcmp (arg, processor_alias_table[i].name))
5462 ia64_tune = processor_alias_table[i].processor;
5463 break;
5465 if (i == pta_size)
5466 error ("bad value %<%s%> for -mtune= switch", arg);
5467 return true;
5470 default:
5471 return true;
5475 /* Implement OVERRIDE_OPTIONS. */
5477 void
5478 ia64_override_options (void)
5480 if (TARGET_AUTO_PIC)
5481 target_flags |= MASK_CONST_GP;
5483 /* Numerous experiment shows that IRA based loop pressure
5484 calculation works better for RTL loop invariant motion on targets
5485 with enough (>= 32) registers. It is an expensive optimization.
5486 So it is on only for peak performance. */
5487 if (optimize >= 3)
5488 flag_ira_loop_pressure = 1;
5491 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5493 init_machine_status = ia64_init_machine_status;
5495 if (align_functions <= 0)
5496 align_functions = 64;
5497 if (align_loops <= 0)
5498 align_loops = 32;
5499 if (TARGET_ABI_OPEN_VMS)
5500 flag_no_common = 1;
5502 ia64_override_options_after_change();
5505 /* Implement targetm.override_options_after_change. */
5507 static void
5508 ia64_override_options_after_change (void)
5510 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5511 flag_schedule_insns_after_reload = 0;
5513 if (optimize >= 3
5514 && ! sel_sched_switch_set)
5516 flag_selective_scheduling2 = 1;
5517 flag_sel_sched_pipelining = 1;
5519 if (mflag_sched_control_spec == 2)
5521 /* Control speculation is on by default for the selective scheduler,
5522 but not for the Haifa scheduler. */
5523 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5525 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5527 /* FIXME: remove this when we'd implement breaking autoinsns as
5528 a transformation. */
5529 flag_auto_inc_dec = 0;
5533 /* Initialize the record of emitted frame related registers. */
5535 void ia64_init_expanders (void)
5537 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5540 static struct machine_function *
5541 ia64_init_machine_status (void)
5543 return GGC_CNEW (struct machine_function);
5546 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5547 static enum attr_type ia64_safe_type (rtx);
5549 static enum attr_itanium_class
5550 ia64_safe_itanium_class (rtx insn)
5552 if (recog_memoized (insn) >= 0)
5553 return get_attr_itanium_class (insn);
5554 else if (DEBUG_INSN_P (insn))
5555 return ITANIUM_CLASS_IGNORE;
5556 else
5557 return ITANIUM_CLASS_UNKNOWN;
5560 static enum attr_type
5561 ia64_safe_type (rtx insn)
5563 if (recog_memoized (insn) >= 0)
5564 return get_attr_type (insn);
5565 else
5566 return TYPE_UNKNOWN;
5569 /* The following collection of routines emit instruction group stop bits as
5570 necessary to avoid dependencies. */
5572 /* Need to track some additional registers as far as serialization is
5573 concerned so we can properly handle br.call and br.ret. We could
5574 make these registers visible to gcc, but since these registers are
5575 never explicitly used in gcc generated code, it seems wasteful to
5576 do so (plus it would make the call and return patterns needlessly
5577 complex). */
5578 #define REG_RP (BR_REG (0))
5579 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
5580 /* This is used for volatile asms which may require a stop bit immediately
5581 before and after them. */
5582 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
5583 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5584 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
5586 /* For each register, we keep track of how it has been written in the
5587 current instruction group.
5589 If a register is written unconditionally (no qualifying predicate),
5590 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5592 If a register is written if its qualifying predicate P is true, we
5593 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5594 may be written again by the complement of P (P^1) and when this happens,
5595 WRITE_COUNT gets set to 2.
5597 The result of this is that whenever an insn attempts to write a register
5598 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5600 If a predicate register is written by a floating-point insn, we set
5601 WRITTEN_BY_FP to true.
5603 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5604 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5606 #if GCC_VERSION >= 4000
5607 #define RWS_FIELD_TYPE __extension__ unsigned short
5608 #else
5609 #define RWS_FIELD_TYPE unsigned int
5610 #endif
5611 struct reg_write_state
5613 RWS_FIELD_TYPE write_count : 2;
5614 RWS_FIELD_TYPE first_pred : 10;
5615 RWS_FIELD_TYPE written_by_fp : 1;
5616 RWS_FIELD_TYPE written_by_and : 1;
5617 RWS_FIELD_TYPE written_by_or : 1;
5620 /* Cumulative info for the current instruction group. */
5621 struct reg_write_state rws_sum[NUM_REGS];
5622 #ifdef ENABLE_CHECKING
5623 /* Bitmap whether a register has been written in the current insn. */
5624 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5625 / HOST_BITS_PER_WIDEST_FAST_INT];
5627 static inline void
5628 rws_insn_set (int regno)
5630 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5631 SET_HARD_REG_BIT (rws_insn, regno);
5634 static inline int
5635 rws_insn_test (int regno)
5637 return TEST_HARD_REG_BIT (rws_insn, regno);
5639 #else
5640 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5641 unsigned char rws_insn[2];
5643 static inline void
5644 rws_insn_set (int regno)
5646 if (regno == REG_AR_CFM)
5647 rws_insn[0] = 1;
5648 else if (regno == REG_VOLATILE)
5649 rws_insn[1] = 1;
5652 static inline int
5653 rws_insn_test (int regno)
5655 if (regno == REG_AR_CFM)
5656 return rws_insn[0];
5657 if (regno == REG_VOLATILE)
5658 return rws_insn[1];
5659 return 0;
5661 #endif
5663 /* Indicates whether this is the first instruction after a stop bit,
5664 in which case we don't need another stop bit. Without this,
5665 ia64_variable_issue will die when scheduling an alloc. */
5666 static int first_instruction;
5668 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5669 RTL for one instruction. */
5670 struct reg_flags
5672 unsigned int is_write : 1; /* Is register being written? */
5673 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5674 unsigned int is_branch : 1; /* Is register used as part of a branch? */
5675 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5676 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
5677 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
5680 static void rws_update (int, struct reg_flags, int);
5681 static int rws_access_regno (int, struct reg_flags, int);
5682 static int rws_access_reg (rtx, struct reg_flags, int);
5683 static void update_set_flags (rtx, struct reg_flags *);
5684 static int set_src_needs_barrier (rtx, struct reg_flags, int);
5685 static int rtx_needs_barrier (rtx, struct reg_flags, int);
5686 static void init_insn_group_barriers (void);
5687 static int group_barrier_needed (rtx);
5688 static int safe_group_barrier_needed (rtx);
5689 static int in_safe_group_barrier;
5691 /* Update *RWS for REGNO, which is being written by the current instruction,
5692 with predicate PRED, and associated register flags in FLAGS. */
5694 static void
5695 rws_update (int regno, struct reg_flags flags, int pred)
5697 if (pred)
5698 rws_sum[regno].write_count++;
5699 else
5700 rws_sum[regno].write_count = 2;
5701 rws_sum[regno].written_by_fp |= flags.is_fp;
5702 /* ??? Not tracking and/or across differing predicates. */
5703 rws_sum[regno].written_by_and = flags.is_and;
5704 rws_sum[regno].written_by_or = flags.is_or;
5705 rws_sum[regno].first_pred = pred;
5708 /* Handle an access to register REGNO of type FLAGS using predicate register
5709 PRED. Update rws_sum array. Return 1 if this access creates
5710 a dependency with an earlier instruction in the same group. */
5712 static int
5713 rws_access_regno (int regno, struct reg_flags flags, int pred)
5715 int need_barrier = 0;
5717 gcc_assert (regno < NUM_REGS);
5719 if (! PR_REGNO_P (regno))
5720 flags.is_and = flags.is_or = 0;
5722 if (flags.is_write)
5724 int write_count;
5726 rws_insn_set (regno);
5727 write_count = rws_sum[regno].write_count;
5729 switch (write_count)
5731 case 0:
5732 /* The register has not been written yet. */
5733 if (!in_safe_group_barrier)
5734 rws_update (regno, flags, pred);
5735 break;
5737 case 1:
5738 /* The register has been written via a predicate. If this is
5739 not a complementary predicate, then we need a barrier. */
5740 /* ??? This assumes that P and P+1 are always complementary
5741 predicates for P even. */
5742 if (flags.is_and && rws_sum[regno].written_by_and)
5744 else if (flags.is_or && rws_sum[regno].written_by_or)
5746 else if ((rws_sum[regno].first_pred ^ 1) != pred)
5747 need_barrier = 1;
5748 if (!in_safe_group_barrier)
5749 rws_update (regno, flags, pred);
5750 break;
5752 case 2:
5753 /* The register has been unconditionally written already. We
5754 need a barrier. */
5755 if (flags.is_and && rws_sum[regno].written_by_and)
5757 else if (flags.is_or && rws_sum[regno].written_by_or)
5759 else
5760 need_barrier = 1;
5761 if (!in_safe_group_barrier)
5763 rws_sum[regno].written_by_and = flags.is_and;
5764 rws_sum[regno].written_by_or = flags.is_or;
5766 break;
5768 default:
5769 gcc_unreachable ();
5772 else
5774 if (flags.is_branch)
5776 /* Branches have several RAW exceptions that allow to avoid
5777 barriers. */
5779 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5780 /* RAW dependencies on branch regs are permissible as long
5781 as the writer is a non-branch instruction. Since we
5782 never generate code that uses a branch register written
5783 by a branch instruction, handling this case is
5784 easy. */
5785 return 0;
5787 if (REGNO_REG_CLASS (regno) == PR_REGS
5788 && ! rws_sum[regno].written_by_fp)
5789 /* The predicates of a branch are available within the
5790 same insn group as long as the predicate was written by
5791 something other than a floating-point instruction. */
5792 return 0;
5795 if (flags.is_and && rws_sum[regno].written_by_and)
5796 return 0;
5797 if (flags.is_or && rws_sum[regno].written_by_or)
5798 return 0;
5800 switch (rws_sum[regno].write_count)
5802 case 0:
5803 /* The register has not been written yet. */
5804 break;
5806 case 1:
5807 /* The register has been written via a predicate. If this is
5808 not a complementary predicate, then we need a barrier. */
5809 /* ??? This assumes that P and P+1 are always complementary
5810 predicates for P even. */
5811 if ((rws_sum[regno].first_pred ^ 1) != pred)
5812 need_barrier = 1;
5813 break;
5815 case 2:
5816 /* The register has been unconditionally written already. We
5817 need a barrier. */
5818 need_barrier = 1;
5819 break;
5821 default:
5822 gcc_unreachable ();
5826 return need_barrier;
5829 static int
5830 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5832 int regno = REGNO (reg);
5833 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5835 if (n == 1)
5836 return rws_access_regno (regno, flags, pred);
5837 else
5839 int need_barrier = 0;
5840 while (--n >= 0)
5841 need_barrier |= rws_access_regno (regno + n, flags, pred);
5842 return need_barrier;
5846 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
5847 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5849 static void
5850 update_set_flags (rtx x, struct reg_flags *pflags)
5852 rtx src = SET_SRC (x);
5854 switch (GET_CODE (src))
5856 case CALL:
5857 return;
5859 case IF_THEN_ELSE:
5860 /* There are four cases here:
5861 (1) The destination is (pc), in which case this is a branch,
5862 nothing here applies.
5863 (2) The destination is ar.lc, in which case this is a
5864 doloop_end_internal,
5865 (3) The destination is an fp register, in which case this is
5866 an fselect instruction.
5867 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5868 this is a check load.
5869 In all cases, nothing we do in this function applies. */
5870 return;
5872 default:
5873 if (COMPARISON_P (src)
5874 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5875 /* Set pflags->is_fp to 1 so that we know we're dealing
5876 with a floating point comparison when processing the
5877 destination of the SET. */
5878 pflags->is_fp = 1;
5880 /* Discover if this is a parallel comparison. We only handle
5881 and.orcm and or.andcm at present, since we must retain a
5882 strict inverse on the predicate pair. */
5883 else if (GET_CODE (src) == AND)
5884 pflags->is_and = 1;
5885 else if (GET_CODE (src) == IOR)
5886 pflags->is_or = 1;
5888 break;
5892 /* Subroutine of rtx_needs_barrier; this function determines whether the
5893 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5894 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5895 for this insn. */
5897 static int
5898 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5900 int need_barrier = 0;
5901 rtx dst;
5902 rtx src = SET_SRC (x);
5904 if (GET_CODE (src) == CALL)
5905 /* We don't need to worry about the result registers that
5906 get written by subroutine call. */
5907 return rtx_needs_barrier (src, flags, pred);
5908 else if (SET_DEST (x) == pc_rtx)
5910 /* X is a conditional branch. */
5911 /* ??? This seems redundant, as the caller sets this bit for
5912 all JUMP_INSNs. */
5913 if (!ia64_spec_check_src_p (src))
5914 flags.is_branch = 1;
5915 return rtx_needs_barrier (src, flags, pred);
5918 if (ia64_spec_check_src_p (src))
5919 /* Avoid checking one register twice (in condition
5920 and in 'then' section) for ldc pattern. */
5922 gcc_assert (REG_P (XEXP (src, 2)));
5923 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5925 /* We process MEM below. */
5926 src = XEXP (src, 1);
5929 need_barrier |= rtx_needs_barrier (src, flags, pred);
5931 dst = SET_DEST (x);
5932 if (GET_CODE (dst) == ZERO_EXTRACT)
5934 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5935 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5937 return need_barrier;
5940 /* Handle an access to rtx X of type FLAGS using predicate register
5941 PRED. Return 1 if this access creates a dependency with an earlier
5942 instruction in the same group. */
5944 static int
5945 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5947 int i, j;
5948 int is_complemented = 0;
5949 int need_barrier = 0;
5950 const char *format_ptr;
5951 struct reg_flags new_flags;
5952 rtx cond;
5954 if (! x)
5955 return 0;
5957 new_flags = flags;
5959 switch (GET_CODE (x))
5961 case SET:
5962 update_set_flags (x, &new_flags);
5963 need_barrier = set_src_needs_barrier (x, new_flags, pred);
5964 if (GET_CODE (SET_SRC (x)) != CALL)
5966 new_flags.is_write = 1;
5967 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5969 break;
5971 case CALL:
5972 new_flags.is_write = 0;
5973 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5975 /* Avoid multiple register writes, in case this is a pattern with
5976 multiple CALL rtx. This avoids a failure in rws_access_reg. */
5977 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
5979 new_flags.is_write = 1;
5980 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5981 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5982 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5984 break;
5986 case COND_EXEC:
5987 /* X is a predicated instruction. */
5989 cond = COND_EXEC_TEST (x);
5990 gcc_assert (!pred);
5991 need_barrier = rtx_needs_barrier (cond, flags, 0);
5993 if (GET_CODE (cond) == EQ)
5994 is_complemented = 1;
5995 cond = XEXP (cond, 0);
5996 gcc_assert (GET_CODE (cond) == REG
5997 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5998 pred = REGNO (cond);
5999 if (is_complemented)
6000 ++pred;
6002 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6003 return need_barrier;
6005 case CLOBBER:
6006 case USE:
6007 /* Clobber & use are for earlier compiler-phases only. */
6008 break;
6010 case ASM_OPERANDS:
6011 case ASM_INPUT:
6012 /* We always emit stop bits for traditional asms. We emit stop bits
6013 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6014 if (GET_CODE (x) != ASM_OPERANDS
6015 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6017 /* Avoid writing the register multiple times if we have multiple
6018 asm outputs. This avoids a failure in rws_access_reg. */
6019 if (! rws_insn_test (REG_VOLATILE))
6021 new_flags.is_write = 1;
6022 rws_access_regno (REG_VOLATILE, new_flags, pred);
6024 return 1;
6027 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6028 We cannot just fall through here since then we would be confused
6029 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6030 traditional asms unlike their normal usage. */
6032 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6033 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6034 need_barrier = 1;
6035 break;
6037 case PARALLEL:
6038 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6040 rtx pat = XVECEXP (x, 0, i);
6041 switch (GET_CODE (pat))
6043 case SET:
6044 update_set_flags (pat, &new_flags);
6045 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6046 break;
6048 case USE:
6049 case CALL:
6050 case ASM_OPERANDS:
6051 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6052 break;
6054 case CLOBBER:
6055 case RETURN:
6056 break;
6058 default:
6059 gcc_unreachable ();
6062 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6064 rtx pat = XVECEXP (x, 0, i);
6065 if (GET_CODE (pat) == SET)
6067 if (GET_CODE (SET_SRC (pat)) != CALL)
6069 new_flags.is_write = 1;
6070 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6071 pred);
6074 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6075 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6077 break;
6079 case SUBREG:
6080 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6081 break;
6082 case REG:
6083 if (REGNO (x) == AR_UNAT_REGNUM)
6085 for (i = 0; i < 64; ++i)
6086 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6088 else
6089 need_barrier = rws_access_reg (x, flags, pred);
6090 break;
6092 case MEM:
6093 /* Find the regs used in memory address computation. */
6094 new_flags.is_write = 0;
6095 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6096 break;
6098 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6099 case SYMBOL_REF: case LABEL_REF: case CONST:
6100 break;
6102 /* Operators with side-effects. */
6103 case POST_INC: case POST_DEC:
6104 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6106 new_flags.is_write = 0;
6107 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6108 new_flags.is_write = 1;
6109 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6110 break;
6112 case POST_MODIFY:
6113 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6115 new_flags.is_write = 0;
6116 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6117 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6118 new_flags.is_write = 1;
6119 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6120 break;
6122 /* Handle common unary and binary ops for efficiency. */
6123 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6124 case MOD: case UDIV: case UMOD: case AND: case IOR:
6125 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6126 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6127 case NE: case EQ: case GE: case GT: case LE:
6128 case LT: case GEU: case GTU: case LEU: case LTU:
6129 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6130 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6131 break;
6133 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6134 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6135 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6136 case SQRT: case FFS: case POPCOUNT:
6137 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6138 break;
6140 case VEC_SELECT:
6141 /* VEC_SELECT's second argument is a PARALLEL with integers that
6142 describe the elements selected. On ia64, those integers are
6143 always constants. Avoid walking the PARALLEL so that we don't
6144 get confused with "normal" parallels and then die. */
6145 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6146 break;
6148 case UNSPEC:
6149 switch (XINT (x, 1))
6151 case UNSPEC_LTOFF_DTPMOD:
6152 case UNSPEC_LTOFF_DTPREL:
6153 case UNSPEC_DTPREL:
6154 case UNSPEC_LTOFF_TPREL:
6155 case UNSPEC_TPREL:
6156 case UNSPEC_PRED_REL_MUTEX:
6157 case UNSPEC_PIC_CALL:
6158 case UNSPEC_MF:
6159 case UNSPEC_FETCHADD_ACQ:
6160 case UNSPEC_BSP_VALUE:
6161 case UNSPEC_FLUSHRS:
6162 case UNSPEC_BUNDLE_SELECTOR:
6163 break;
6165 case UNSPEC_GR_SPILL:
6166 case UNSPEC_GR_RESTORE:
6168 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6169 HOST_WIDE_INT bit = (offset >> 3) & 63;
6171 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6172 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6173 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6174 new_flags, pred);
6175 break;
6178 case UNSPEC_FR_SPILL:
6179 case UNSPEC_FR_RESTORE:
6180 case UNSPEC_GETF_EXP:
6181 case UNSPEC_SETF_EXP:
6182 case UNSPEC_ADDP4:
6183 case UNSPEC_FR_SQRT_RECIP_APPROX:
6184 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6185 case UNSPEC_LDA:
6186 case UNSPEC_LDS:
6187 case UNSPEC_LDS_A:
6188 case UNSPEC_LDSA:
6189 case UNSPEC_CHKACLR:
6190 case UNSPEC_CHKS:
6191 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6192 break;
6194 case UNSPEC_FR_RECIP_APPROX:
6195 case UNSPEC_SHRP:
6196 case UNSPEC_COPYSIGN:
6197 case UNSPEC_FR_RECIP_APPROX_RES:
6198 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6199 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6200 break;
6202 case UNSPEC_CMPXCHG_ACQ:
6203 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6204 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6205 break;
6207 default:
6208 gcc_unreachable ();
6210 break;
6212 case UNSPEC_VOLATILE:
6213 switch (XINT (x, 1))
6215 case UNSPECV_ALLOC:
6216 /* Alloc must always be the first instruction of a group.
6217 We force this by always returning true. */
6218 /* ??? We might get better scheduling if we explicitly check for
6219 input/local/output register dependencies, and modify the
6220 scheduler so that alloc is always reordered to the start of
6221 the current group. We could then eliminate all of the
6222 first_instruction code. */
6223 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6225 new_flags.is_write = 1;
6226 rws_access_regno (REG_AR_CFM, new_flags, pred);
6227 return 1;
6229 case UNSPECV_SET_BSP:
6230 need_barrier = 1;
6231 break;
6233 case UNSPECV_BLOCKAGE:
6234 case UNSPECV_INSN_GROUP_BARRIER:
6235 case UNSPECV_BREAK:
6236 case UNSPECV_PSAC_ALL:
6237 case UNSPECV_PSAC_NORMAL:
6238 return 0;
6240 default:
6241 gcc_unreachable ();
6243 break;
6245 case RETURN:
6246 new_flags.is_write = 0;
6247 need_barrier = rws_access_regno (REG_RP, flags, pred);
6248 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6250 new_flags.is_write = 1;
6251 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6252 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6253 break;
6255 default:
6256 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6257 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6258 switch (format_ptr[i])
6260 case '0': /* unused field */
6261 case 'i': /* integer */
6262 case 'n': /* note */
6263 case 'w': /* wide integer */
6264 case 's': /* pointer to string */
6265 case 'S': /* optional pointer to string */
6266 break;
6268 case 'e':
6269 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6270 need_barrier = 1;
6271 break;
6273 case 'E':
6274 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6275 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6276 need_barrier = 1;
6277 break;
6279 default:
6280 gcc_unreachable ();
6282 break;
6284 return need_barrier;
6287 /* Clear out the state for group_barrier_needed at the start of a
6288 sequence of insns. */
6290 static void
6291 init_insn_group_barriers (void)
6293 memset (rws_sum, 0, sizeof (rws_sum));
6294 first_instruction = 1;
6297 /* Given the current state, determine whether a group barrier (a stop bit) is
6298 necessary before INSN. Return nonzero if so. This modifies the state to
6299 include the effects of INSN as a side-effect. */
6301 static int
6302 group_barrier_needed (rtx insn)
6304 rtx pat;
6305 int need_barrier = 0;
6306 struct reg_flags flags;
6308 memset (&flags, 0, sizeof (flags));
6309 switch (GET_CODE (insn))
6311 case NOTE:
6312 case DEBUG_INSN:
6313 break;
6315 case BARRIER:
6316 /* A barrier doesn't imply an instruction group boundary. */
6317 break;
6319 case CODE_LABEL:
6320 memset (rws_insn, 0, sizeof (rws_insn));
6321 return 1;
6323 case CALL_INSN:
6324 flags.is_branch = 1;
6325 flags.is_sibcall = SIBLING_CALL_P (insn);
6326 memset (rws_insn, 0, sizeof (rws_insn));
6328 /* Don't bundle a call following another call. */
6329 if ((pat = prev_active_insn (insn))
6330 && GET_CODE (pat) == CALL_INSN)
6332 need_barrier = 1;
6333 break;
6336 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6337 break;
6339 case JUMP_INSN:
6340 if (!ia64_spec_check_p (insn))
6341 flags.is_branch = 1;
6343 /* Don't bundle a jump following a call. */
6344 if ((pat = prev_active_insn (insn))
6345 && GET_CODE (pat) == CALL_INSN)
6347 need_barrier = 1;
6348 break;
6350 /* FALLTHRU */
6352 case INSN:
6353 if (GET_CODE (PATTERN (insn)) == USE
6354 || GET_CODE (PATTERN (insn)) == CLOBBER)
6355 /* Don't care about USE and CLOBBER "insns"---those are used to
6356 indicate to the optimizer that it shouldn't get rid of
6357 certain operations. */
6358 break;
6360 pat = PATTERN (insn);
6362 /* Ug. Hack hacks hacked elsewhere. */
6363 switch (recog_memoized (insn))
6365 /* We play dependency tricks with the epilogue in order
6366 to get proper schedules. Undo this for dv analysis. */
6367 case CODE_FOR_epilogue_deallocate_stack:
6368 case CODE_FOR_prologue_allocate_stack:
6369 pat = XVECEXP (pat, 0, 0);
6370 break;
6372 /* The pattern we use for br.cloop confuses the code above.
6373 The second element of the vector is representative. */
6374 case CODE_FOR_doloop_end_internal:
6375 pat = XVECEXP (pat, 0, 1);
6376 break;
6378 /* Doesn't generate code. */
6379 case CODE_FOR_pred_rel_mutex:
6380 case CODE_FOR_prologue_use:
6381 return 0;
6383 default:
6384 break;
6387 memset (rws_insn, 0, sizeof (rws_insn));
6388 need_barrier = rtx_needs_barrier (pat, flags, 0);
6390 /* Check to see if the previous instruction was a volatile
6391 asm. */
6392 if (! need_barrier)
6393 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6395 break;
6397 default:
6398 gcc_unreachable ();
6401 if (first_instruction && INSN_P (insn)
6402 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6403 && GET_CODE (PATTERN (insn)) != USE
6404 && GET_CODE (PATTERN (insn)) != CLOBBER)
6406 need_barrier = 0;
6407 first_instruction = 0;
6410 return need_barrier;
6413 /* Like group_barrier_needed, but do not clobber the current state. */
6415 static int
6416 safe_group_barrier_needed (rtx insn)
6418 int saved_first_instruction;
6419 int t;
6421 saved_first_instruction = first_instruction;
6422 in_safe_group_barrier = 1;
6424 t = group_barrier_needed (insn);
6426 first_instruction = saved_first_instruction;
6427 in_safe_group_barrier = 0;
6429 return t;
6432 /* Scan the current function and insert stop bits as necessary to
6433 eliminate dependencies. This function assumes that a final
6434 instruction scheduling pass has been run which has already
6435 inserted most of the necessary stop bits. This function only
6436 inserts new ones at basic block boundaries, since these are
6437 invisible to the scheduler. */
6439 static void
6440 emit_insn_group_barriers (FILE *dump)
6442 rtx insn;
6443 rtx last_label = 0;
6444 int insns_since_last_label = 0;
6446 init_insn_group_barriers ();
6448 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6450 if (GET_CODE (insn) == CODE_LABEL)
6452 if (insns_since_last_label)
6453 last_label = insn;
6454 insns_since_last_label = 0;
6456 else if (GET_CODE (insn) == NOTE
6457 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6459 if (insns_since_last_label)
6460 last_label = insn;
6461 insns_since_last_label = 0;
6463 else if (GET_CODE (insn) == INSN
6464 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6465 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6467 init_insn_group_barriers ();
6468 last_label = 0;
6470 else if (NONDEBUG_INSN_P (insn))
6472 insns_since_last_label = 1;
6474 if (group_barrier_needed (insn))
6476 if (last_label)
6478 if (dump)
6479 fprintf (dump, "Emitting stop before label %d\n",
6480 INSN_UID (last_label));
6481 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6482 insn = last_label;
6484 init_insn_group_barriers ();
6485 last_label = 0;
6492 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6493 This function has to emit all necessary group barriers. */
6495 static void
6496 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6498 rtx insn;
6500 init_insn_group_barriers ();
6502 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6504 if (GET_CODE (insn) == BARRIER)
6506 rtx last = prev_active_insn (insn);
6508 if (! last)
6509 continue;
6510 if (GET_CODE (last) == JUMP_INSN
6511 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6512 last = prev_active_insn (last);
6513 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6514 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6516 init_insn_group_barriers ();
6518 else if (NONDEBUG_INSN_P (insn))
6520 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6521 init_insn_group_barriers ();
6522 else if (group_barrier_needed (insn))
6524 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6525 init_insn_group_barriers ();
6526 group_barrier_needed (insn);
6534 /* Instruction scheduling support. */
6536 #define NR_BUNDLES 10
6538 /* A list of names of all available bundles. */
6540 static const char *bundle_name [NR_BUNDLES] =
6542 ".mii",
6543 ".mmi",
6544 ".mfi",
6545 ".mmf",
6546 #if NR_BUNDLES == 10
6547 ".bbb",
6548 ".mbb",
6549 #endif
6550 ".mib",
6551 ".mmb",
6552 ".mfb",
6553 ".mlx"
6556 /* Nonzero if we should insert stop bits into the schedule. */
6558 int ia64_final_schedule = 0;
6560 /* Codes of the corresponding queried units: */
6562 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6563 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6565 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6566 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6568 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6570 /* The following variable value is an insn group barrier. */
6572 static rtx dfa_stop_insn;
6574 /* The following variable value is the last issued insn. */
6576 static rtx last_scheduled_insn;
6578 /* The following variable value is pointer to a DFA state used as
6579 temporary variable. */
6581 static state_t temp_dfa_state = NULL;
6583 /* The following variable value is DFA state after issuing the last
6584 insn. */
6586 static state_t prev_cycle_state = NULL;
6588 /* The following array element values are TRUE if the corresponding
6589 insn requires to add stop bits before it. */
6591 static char *stops_p = NULL;
6593 /* The following variable is used to set up the mentioned above array. */
6595 static int stop_before_p = 0;
6597 /* The following variable value is length of the arrays `clocks' and
6598 `add_cycles'. */
6600 static int clocks_length;
6602 /* The following variable value is number of data speculations in progress. */
6603 static int pending_data_specs = 0;
6605 /* Number of memory references on current and three future processor cycles. */
6606 static char mem_ops_in_group[4];
6608 /* Number of current processor cycle (from scheduler's point of view). */
6609 static int current_cycle;
6611 static rtx ia64_single_set (rtx);
6612 static void ia64_emit_insn_before (rtx, rtx);
6614 /* Map a bundle number to its pseudo-op. */
6616 const char *
6617 get_bundle_name (int b)
6619 return bundle_name[b];
6623 /* Return the maximum number of instructions a cpu can issue. */
6625 static int
6626 ia64_issue_rate (void)
6628 return 6;
6631 /* Helper function - like single_set, but look inside COND_EXEC. */
6633 static rtx
6634 ia64_single_set (rtx insn)
6636 rtx x = PATTERN (insn), ret;
6637 if (GET_CODE (x) == COND_EXEC)
6638 x = COND_EXEC_CODE (x);
6639 if (GET_CODE (x) == SET)
6640 return x;
6642 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6643 Although they are not classical single set, the second set is there just
6644 to protect it from moving past FP-relative stack accesses. */
6645 switch (recog_memoized (insn))
6647 case CODE_FOR_prologue_allocate_stack:
6648 case CODE_FOR_epilogue_deallocate_stack:
6649 ret = XVECEXP (x, 0, 0);
6650 break;
6652 default:
6653 ret = single_set_2 (insn, x);
6654 break;
6657 return ret;
6660 /* Adjust the cost of a scheduling dependency.
6661 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6662 COST is the current cost, DW is dependency weakness. */
6663 static int
6664 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6666 enum reg_note dep_type = (enum reg_note) dep_type1;
6667 enum attr_itanium_class dep_class;
6668 enum attr_itanium_class insn_class;
6670 insn_class = ia64_safe_itanium_class (insn);
6671 dep_class = ia64_safe_itanium_class (dep_insn);
6673 /* Treat true memory dependencies separately. Ignore apparent true
6674 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6675 if (dep_type == REG_DEP_TRUE
6676 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6677 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6678 return 0;
6680 if (dw == MIN_DEP_WEAK)
6681 /* Store and load are likely to alias, use higher cost to avoid stall. */
6682 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6683 else if (dw > MIN_DEP_WEAK)
6685 /* Store and load are less likely to alias. */
6686 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6687 /* Assume there will be no cache conflict for floating-point data.
6688 For integer data, L1 conflict penalty is huge (17 cycles), so we
6689 never assume it will not cause a conflict. */
6690 return 0;
6691 else
6692 return cost;
6695 if (dep_type != REG_DEP_OUTPUT)
6696 return cost;
6698 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6699 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6700 return 0;
6702 return cost;
6705 /* Like emit_insn_before, but skip cycle_display notes.
6706 ??? When cycle display notes are implemented, update this. */
6708 static void
6709 ia64_emit_insn_before (rtx insn, rtx before)
6711 emit_insn_before (insn, before);
6714 /* The following function marks insns who produce addresses for load
6715 and store insns. Such insns will be placed into M slots because it
6716 decrease latency time for Itanium1 (see function
6717 `ia64_produce_address_p' and the DFA descriptions). */
6719 static void
6720 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6722 rtx insn, next, next_tail;
6724 /* Before reload, which_alternative is not set, which means that
6725 ia64_safe_itanium_class will produce wrong results for (at least)
6726 move instructions. */
6727 if (!reload_completed)
6728 return;
6730 next_tail = NEXT_INSN (tail);
6731 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6732 if (INSN_P (insn))
6733 insn->call = 0;
6734 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6735 if (INSN_P (insn)
6736 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6738 sd_iterator_def sd_it;
6739 dep_t dep;
6740 bool has_mem_op_consumer_p = false;
6742 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6744 enum attr_itanium_class c;
6746 if (DEP_TYPE (dep) != REG_DEP_TRUE)
6747 continue;
6749 next = DEP_CON (dep);
6750 c = ia64_safe_itanium_class (next);
6751 if ((c == ITANIUM_CLASS_ST
6752 || c == ITANIUM_CLASS_STF)
6753 && ia64_st_address_bypass_p (insn, next))
6755 has_mem_op_consumer_p = true;
6756 break;
6758 else if ((c == ITANIUM_CLASS_LD
6759 || c == ITANIUM_CLASS_FLD
6760 || c == ITANIUM_CLASS_FLDP)
6761 && ia64_ld_address_bypass_p (insn, next))
6763 has_mem_op_consumer_p = true;
6764 break;
6768 insn->call = has_mem_op_consumer_p;
6772 /* We're beginning a new block. Initialize data structures as necessary. */
6774 static void
6775 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6776 int sched_verbose ATTRIBUTE_UNUSED,
6777 int max_ready ATTRIBUTE_UNUSED)
6779 #ifdef ENABLE_CHECKING
6780 rtx insn;
6782 if (!sel_sched_p () && reload_completed)
6783 for (insn = NEXT_INSN (current_sched_info->prev_head);
6784 insn != current_sched_info->next_tail;
6785 insn = NEXT_INSN (insn))
6786 gcc_assert (!SCHED_GROUP_P (insn));
6787 #endif
6788 last_scheduled_insn = NULL_RTX;
6789 init_insn_group_barriers ();
6791 current_cycle = 0;
6792 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
6795 /* We're beginning a scheduling pass. Check assertion. */
6797 static void
6798 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6799 int sched_verbose ATTRIBUTE_UNUSED,
6800 int max_ready ATTRIBUTE_UNUSED)
6802 gcc_assert (pending_data_specs == 0);
6805 /* Scheduling pass is now finished. Free/reset static variable. */
6806 static void
6807 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6808 int sched_verbose ATTRIBUTE_UNUSED)
6810 gcc_assert (pending_data_specs == 0);
6813 /* Return TRUE if INSN is a load (either normal or speculative, but not a
6814 speculation check), FALSE otherwise. */
6815 static bool
6816 is_load_p (rtx insn)
6818 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6820 return
6821 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6822 && get_attr_check_load (insn) == CHECK_LOAD_NO);
6825 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6826 (taking account for 3-cycle cache reference postponing for stores: Intel
6827 Itanium 2 Reference Manual for Software Development and Optimization,
6828 6.7.3.1). */
6829 static void
6830 record_memory_reference (rtx insn)
6832 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6834 switch (insn_class) {
6835 case ITANIUM_CLASS_FLD:
6836 case ITANIUM_CLASS_LD:
6837 mem_ops_in_group[current_cycle % 4]++;
6838 break;
6839 case ITANIUM_CLASS_STF:
6840 case ITANIUM_CLASS_ST:
6841 mem_ops_in_group[(current_cycle + 3) % 4]++;
6842 break;
6843 default:;
6847 /* We are about to being issuing insns for this clock cycle.
6848 Override the default sort algorithm to better slot instructions. */
6850 static int
6851 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6852 int *pn_ready, int clock_var,
6853 int reorder_type)
6855 int n_asms;
6856 int n_ready = *pn_ready;
6857 rtx *e_ready = ready + n_ready;
6858 rtx *insnp;
6860 if (sched_verbose)
6861 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6863 if (reorder_type == 0)
6865 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6866 n_asms = 0;
6867 for (insnp = ready; insnp < e_ready; insnp++)
6868 if (insnp < e_ready)
6870 rtx insn = *insnp;
6871 enum attr_type t = ia64_safe_type (insn);
6872 if (t == TYPE_UNKNOWN)
6874 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6875 || asm_noperands (PATTERN (insn)) >= 0)
6877 rtx lowest = ready[n_asms];
6878 ready[n_asms] = insn;
6879 *insnp = lowest;
6880 n_asms++;
6882 else
6884 rtx highest = ready[n_ready - 1];
6885 ready[n_ready - 1] = insn;
6886 *insnp = highest;
6887 return 1;
6892 if (n_asms < n_ready)
6894 /* Some normal insns to process. Skip the asms. */
6895 ready += n_asms;
6896 n_ready -= n_asms;
6898 else if (n_ready > 0)
6899 return 1;
6902 if (ia64_final_schedule)
6904 int deleted = 0;
6905 int nr_need_stop = 0;
6907 for (insnp = ready; insnp < e_ready; insnp++)
6908 if (safe_group_barrier_needed (*insnp))
6909 nr_need_stop++;
6911 if (reorder_type == 1 && n_ready == nr_need_stop)
6912 return 0;
6913 if (reorder_type == 0)
6914 return 1;
6915 insnp = e_ready;
6916 /* Move down everything that needs a stop bit, preserving
6917 relative order. */
6918 while (insnp-- > ready + deleted)
6919 while (insnp >= ready + deleted)
6921 rtx insn = *insnp;
6922 if (! safe_group_barrier_needed (insn))
6923 break;
6924 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6925 *ready = insn;
6926 deleted++;
6928 n_ready -= deleted;
6929 ready += deleted;
6932 current_cycle = clock_var;
6933 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
6935 int moved = 0;
6937 insnp = e_ready;
6938 /* Move down loads/stores, preserving relative order. */
6939 while (insnp-- > ready + moved)
6940 while (insnp >= ready + moved)
6942 rtx insn = *insnp;
6943 if (! is_load_p (insn))
6944 break;
6945 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6946 *ready = insn;
6947 moved++;
6949 n_ready -= moved;
6950 ready += moved;
6953 return 1;
6956 /* We are about to being issuing insns for this clock cycle. Override
6957 the default sort algorithm to better slot instructions. */
6959 static int
6960 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6961 int clock_var)
6963 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6964 pn_ready, clock_var, 0);
6967 /* Like ia64_sched_reorder, but called after issuing each insn.
6968 Override the default sort algorithm to better slot instructions. */
6970 static int
6971 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6972 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6973 int *pn_ready, int clock_var)
6975 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6976 clock_var, 1);
6979 /* We are about to issue INSN. Return the number of insns left on the
6980 ready queue that can be issued this cycle. */
6982 static int
6983 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6984 int sched_verbose ATTRIBUTE_UNUSED,
6985 rtx insn ATTRIBUTE_UNUSED,
6986 int can_issue_more ATTRIBUTE_UNUSED)
6988 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
6989 /* Modulo scheduling does not extend h_i_d when emitting
6990 new instructions. Don't use h_i_d, if we don't have to. */
6992 if (DONE_SPEC (insn) & BEGIN_DATA)
6993 pending_data_specs++;
6994 if (CHECK_SPEC (insn) & BEGIN_DATA)
6995 pending_data_specs--;
6998 if (DEBUG_INSN_P (insn))
6999 return 1;
7001 last_scheduled_insn = insn;
7002 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7003 if (reload_completed)
7005 int needed = group_barrier_needed (insn);
7007 gcc_assert (!needed);
7008 if (GET_CODE (insn) == CALL_INSN)
7009 init_insn_group_barriers ();
7010 stops_p [INSN_UID (insn)] = stop_before_p;
7011 stop_before_p = 0;
7013 record_memory_reference (insn);
7015 return 1;
7018 /* We are choosing insn from the ready queue. Return nonzero if INSN
7019 can be chosen. */
7021 static int
7022 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7024 gcc_assert (insn && INSN_P (insn));
7025 return ((!reload_completed
7026 || !safe_group_barrier_needed (insn))
7027 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7028 && (!mflag_sched_mem_insns_hard_limit
7029 || !is_load_p (insn)
7030 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7033 /* We are choosing insn from the ready queue. Return nonzero if INSN
7034 can be chosen. */
7036 static bool
7037 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7039 gcc_assert (insn && INSN_P (insn));
7040 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7041 we keep ALAT half-empty. */
7042 return (pending_data_specs < 16
7043 || !(TODO_SPEC (insn) & BEGIN_DATA));
7046 /* The following variable value is pseudo-insn used by the DFA insn
7047 scheduler to change the DFA state when the simulated clock is
7048 increased. */
7050 static rtx dfa_pre_cycle_insn;
7052 /* Returns 1 when a meaningful insn was scheduled between the last group
7053 barrier and LAST. */
7054 static int
7055 scheduled_good_insn (rtx last)
7057 if (last && recog_memoized (last) >= 0)
7058 return 1;
7060 for ( ;
7061 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7062 && !stops_p[INSN_UID (last)];
7063 last = PREV_INSN (last))
7064 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7065 the ebb we're scheduling. */
7066 if (INSN_P (last) && recog_memoized (last) >= 0)
7067 return 1;
7069 return 0;
7072 /* We are about to being issuing INSN. Return nonzero if we cannot
7073 issue it on given cycle CLOCK and return zero if we should not sort
7074 the ready queue on the next clock start. */
7076 static int
7077 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7078 int clock, int *sort_p)
7080 gcc_assert (insn && INSN_P (insn));
7082 if (DEBUG_INSN_P (insn))
7083 return 0;
7085 /* When a group barrier is needed for insn, last_scheduled_insn
7086 should be set. */
7087 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7088 || last_scheduled_insn);
7090 if ((reload_completed
7091 && (safe_group_barrier_needed (insn)
7092 || (mflag_sched_stop_bits_after_every_cycle
7093 && last_clock != clock
7094 && last_scheduled_insn
7095 && scheduled_good_insn (last_scheduled_insn))))
7096 || (last_scheduled_insn
7097 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7098 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7099 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7101 init_insn_group_barriers ();
7103 if (verbose && dump)
7104 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7105 last_clock == clock ? " + cycle advance" : "");
7107 stop_before_p = 1;
7108 current_cycle = clock;
7109 mem_ops_in_group[current_cycle % 4] = 0;
7111 if (last_clock == clock)
7113 state_transition (curr_state, dfa_stop_insn);
7114 if (TARGET_EARLY_STOP_BITS)
7115 *sort_p = (last_scheduled_insn == NULL_RTX
7116 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7117 else
7118 *sort_p = 0;
7119 return 1;
7122 if (last_scheduled_insn)
7124 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7125 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7126 state_reset (curr_state);
7127 else
7129 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7130 state_transition (curr_state, dfa_stop_insn);
7131 state_transition (curr_state, dfa_pre_cycle_insn);
7132 state_transition (curr_state, NULL);
7136 return 0;
7139 /* Implement targetm.sched.h_i_d_extended hook.
7140 Extend internal data structures. */
7141 static void
7142 ia64_h_i_d_extended (void)
7144 if (stops_p != NULL)
7146 int new_clocks_length = get_max_uid () * 3 / 2;
7147 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7148 clocks_length = new_clocks_length;
7153 /* This structure describes the data used by the backend to guide scheduling.
7154 When the current scheduling point is switched, this data should be saved
7155 and restored later, if the scheduler returns to this point. */
7156 struct _ia64_sched_context
7158 state_t prev_cycle_state;
7159 rtx last_scheduled_insn;
7160 struct reg_write_state rws_sum[NUM_REGS];
7161 struct reg_write_state rws_insn[NUM_REGS];
7162 int first_instruction;
7163 int pending_data_specs;
7164 int current_cycle;
7165 char mem_ops_in_group[4];
7167 typedef struct _ia64_sched_context *ia64_sched_context_t;
7169 /* Allocates a scheduling context. */
7170 static void *
7171 ia64_alloc_sched_context (void)
7173 return xmalloc (sizeof (struct _ia64_sched_context));
7176 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7177 the global context otherwise. */
7178 static void
7179 ia64_init_sched_context (void *_sc, bool clean_p)
7181 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7183 sc->prev_cycle_state = xmalloc (dfa_state_size);
7184 if (clean_p)
7186 state_reset (sc->prev_cycle_state);
7187 sc->last_scheduled_insn = NULL_RTX;
7188 memset (sc->rws_sum, 0, sizeof (rws_sum));
7189 memset (sc->rws_insn, 0, sizeof (rws_insn));
7190 sc->first_instruction = 1;
7191 sc->pending_data_specs = 0;
7192 sc->current_cycle = 0;
7193 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7195 else
7197 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7198 sc->last_scheduled_insn = last_scheduled_insn;
7199 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7200 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7201 sc->first_instruction = first_instruction;
7202 sc->pending_data_specs = pending_data_specs;
7203 sc->current_cycle = current_cycle;
7204 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7208 /* Sets the global scheduling context to the one pointed to by _SC. */
7209 static void
7210 ia64_set_sched_context (void *_sc)
7212 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7214 gcc_assert (sc != NULL);
7216 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7217 last_scheduled_insn = sc->last_scheduled_insn;
7218 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7219 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7220 first_instruction = sc->first_instruction;
7221 pending_data_specs = sc->pending_data_specs;
7222 current_cycle = sc->current_cycle;
7223 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7226 /* Clears the data in the _SC scheduling context. */
7227 static void
7228 ia64_clear_sched_context (void *_sc)
7230 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7232 free (sc->prev_cycle_state);
7233 sc->prev_cycle_state = NULL;
7236 /* Frees the _SC scheduling context. */
7237 static void
7238 ia64_free_sched_context (void *_sc)
7240 gcc_assert (_sc != NULL);
7242 free (_sc);
7245 typedef rtx (* gen_func_t) (rtx, rtx);
7247 /* Return a function that will generate a load of mode MODE_NO
7248 with speculation types TS. */
7249 static gen_func_t
7250 get_spec_load_gen_function (ds_t ts, int mode_no)
7252 static gen_func_t gen_ld_[] = {
7253 gen_movbi,
7254 gen_movqi_internal,
7255 gen_movhi_internal,
7256 gen_movsi_internal,
7257 gen_movdi_internal,
7258 gen_movsf_internal,
7259 gen_movdf_internal,
7260 gen_movxf_internal,
7261 gen_movti_internal,
7262 gen_zero_extendqidi2,
7263 gen_zero_extendhidi2,
7264 gen_zero_extendsidi2,
7267 static gen_func_t gen_ld_a[] = {
7268 gen_movbi_advanced,
7269 gen_movqi_advanced,
7270 gen_movhi_advanced,
7271 gen_movsi_advanced,
7272 gen_movdi_advanced,
7273 gen_movsf_advanced,
7274 gen_movdf_advanced,
7275 gen_movxf_advanced,
7276 gen_movti_advanced,
7277 gen_zero_extendqidi2_advanced,
7278 gen_zero_extendhidi2_advanced,
7279 gen_zero_extendsidi2_advanced,
7281 static gen_func_t gen_ld_s[] = {
7282 gen_movbi_speculative,
7283 gen_movqi_speculative,
7284 gen_movhi_speculative,
7285 gen_movsi_speculative,
7286 gen_movdi_speculative,
7287 gen_movsf_speculative,
7288 gen_movdf_speculative,
7289 gen_movxf_speculative,
7290 gen_movti_speculative,
7291 gen_zero_extendqidi2_speculative,
7292 gen_zero_extendhidi2_speculative,
7293 gen_zero_extendsidi2_speculative,
7295 static gen_func_t gen_ld_sa[] = {
7296 gen_movbi_speculative_advanced,
7297 gen_movqi_speculative_advanced,
7298 gen_movhi_speculative_advanced,
7299 gen_movsi_speculative_advanced,
7300 gen_movdi_speculative_advanced,
7301 gen_movsf_speculative_advanced,
7302 gen_movdf_speculative_advanced,
7303 gen_movxf_speculative_advanced,
7304 gen_movti_speculative_advanced,
7305 gen_zero_extendqidi2_speculative_advanced,
7306 gen_zero_extendhidi2_speculative_advanced,
7307 gen_zero_extendsidi2_speculative_advanced,
7309 static gen_func_t gen_ld_s_a[] = {
7310 gen_movbi_speculative_a,
7311 gen_movqi_speculative_a,
7312 gen_movhi_speculative_a,
7313 gen_movsi_speculative_a,
7314 gen_movdi_speculative_a,
7315 gen_movsf_speculative_a,
7316 gen_movdf_speculative_a,
7317 gen_movxf_speculative_a,
7318 gen_movti_speculative_a,
7319 gen_zero_extendqidi2_speculative_a,
7320 gen_zero_extendhidi2_speculative_a,
7321 gen_zero_extendsidi2_speculative_a,
7324 gen_func_t *gen_ld;
7326 if (ts & BEGIN_DATA)
7328 if (ts & BEGIN_CONTROL)
7329 gen_ld = gen_ld_sa;
7330 else
7331 gen_ld = gen_ld_a;
7333 else if (ts & BEGIN_CONTROL)
7335 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7336 || ia64_needs_block_p (ts))
7337 gen_ld = gen_ld_s;
7338 else
7339 gen_ld = gen_ld_s_a;
7341 else if (ts == 0)
7342 gen_ld = gen_ld_;
7343 else
7344 gcc_unreachable ();
7346 return gen_ld[mode_no];
7349 /* Constants that help mapping 'enum machine_mode' to int. */
7350 enum SPEC_MODES
7352 SPEC_MODE_INVALID = -1,
7353 SPEC_MODE_FIRST = 0,
7354 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7355 SPEC_MODE_FOR_EXTEND_LAST = 3,
7356 SPEC_MODE_LAST = 8
7359 enum
7361 /* Offset to reach ZERO_EXTEND patterns. */
7362 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7365 /* Return index of the MODE. */
7366 static int
7367 ia64_mode_to_int (enum machine_mode mode)
7369 switch (mode)
7371 case BImode: return 0; /* SPEC_MODE_FIRST */
7372 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7373 case HImode: return 2;
7374 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7375 case DImode: return 4;
7376 case SFmode: return 5;
7377 case DFmode: return 6;
7378 case XFmode: return 7;
7379 case TImode:
7380 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7381 mentioned in itanium[12].md. Predicate fp_register_operand also
7382 needs to be defined. Bottom line: better disable for now. */
7383 return SPEC_MODE_INVALID;
7384 default: return SPEC_MODE_INVALID;
7388 /* Provide information about speculation capabilities. */
7389 static void
7390 ia64_set_sched_flags (spec_info_t spec_info)
7392 unsigned int *flags = &(current_sched_info->flags);
7394 if (*flags & SCHED_RGN
7395 || *flags & SCHED_EBB
7396 || *flags & SEL_SCHED)
7398 int mask = 0;
7400 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7401 || (mflag_sched_ar_data_spec && reload_completed))
7403 mask |= BEGIN_DATA;
7405 if (!sel_sched_p ()
7406 && ((mflag_sched_br_in_data_spec && !reload_completed)
7407 || (mflag_sched_ar_in_data_spec && reload_completed)))
7408 mask |= BE_IN_DATA;
7411 if (mflag_sched_control_spec
7412 && (!sel_sched_p ()
7413 || reload_completed))
7415 mask |= BEGIN_CONTROL;
7417 if (!sel_sched_p () && mflag_sched_in_control_spec)
7418 mask |= BE_IN_CONTROL;
7421 spec_info->mask = mask;
7423 if (mask)
7425 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7427 if (mask & BE_IN_SPEC)
7428 *flags |= NEW_BBS;
7430 spec_info->flags = 0;
7432 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7433 spec_info->flags |= PREFER_NON_DATA_SPEC;
7435 if (mask & CONTROL_SPEC)
7437 if (mflag_sched_prefer_non_control_spec_insns)
7438 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7440 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7441 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7444 if (sched_verbose >= 1)
7445 spec_info->dump = sched_dump;
7446 else
7447 spec_info->dump = 0;
7449 if (mflag_sched_count_spec_in_critical_path)
7450 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7453 else
7454 spec_info->mask = 0;
7457 /* If INSN is an appropriate load return its mode.
7458 Return -1 otherwise. */
7459 static int
7460 get_mode_no_for_insn (rtx insn)
7462 rtx reg, mem, mode_rtx;
7463 int mode_no;
7464 bool extend_p;
7466 extract_insn_cached (insn);
7468 /* We use WHICH_ALTERNATIVE only after reload. This will
7469 guarantee that reload won't touch a speculative insn. */
7471 if (recog_data.n_operands != 2)
7472 return -1;
7474 reg = recog_data.operand[0];
7475 mem = recog_data.operand[1];
7477 /* We should use MEM's mode since REG's mode in presence of
7478 ZERO_EXTEND will always be DImode. */
7479 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7480 /* Process non-speculative ld. */
7482 if (!reload_completed)
7484 /* Do not speculate into regs like ar.lc. */
7485 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7486 return -1;
7488 if (!MEM_P (mem))
7489 return -1;
7492 rtx mem_reg = XEXP (mem, 0);
7494 if (!REG_P (mem_reg))
7495 return -1;
7498 mode_rtx = mem;
7500 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7502 gcc_assert (REG_P (reg) && MEM_P (mem));
7503 mode_rtx = mem;
7505 else
7506 return -1;
7508 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7509 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7510 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7511 /* Process speculative ld or ld.c. */
7513 gcc_assert (REG_P (reg) && MEM_P (mem));
7514 mode_rtx = mem;
7516 else
7518 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7520 if (attr_class == ITANIUM_CLASS_CHK_A
7521 || attr_class == ITANIUM_CLASS_CHK_S_I
7522 || attr_class == ITANIUM_CLASS_CHK_S_F)
7523 /* Process chk. */
7524 mode_rtx = reg;
7525 else
7526 return -1;
7529 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7531 if (mode_no == SPEC_MODE_INVALID)
7532 return -1;
7534 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7536 if (extend_p)
7538 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7539 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7540 return -1;
7542 mode_no += SPEC_GEN_EXTEND_OFFSET;
7545 return mode_no;
7548 /* If X is an unspec part of a speculative load, return its code.
7549 Return -1 otherwise. */
7550 static int
7551 get_spec_unspec_code (const_rtx x)
7553 if (GET_CODE (x) != UNSPEC)
7554 return -1;
7557 int code;
7559 code = XINT (x, 1);
7561 switch (code)
7563 case UNSPEC_LDA:
7564 case UNSPEC_LDS:
7565 case UNSPEC_LDS_A:
7566 case UNSPEC_LDSA:
7567 return code;
7569 default:
7570 return -1;
7575 /* Implement skip_rtx_p hook. */
7576 static bool
7577 ia64_skip_rtx_p (const_rtx x)
7579 return get_spec_unspec_code (x) != -1;
7582 /* If INSN is a speculative load, return its UNSPEC code.
7583 Return -1 otherwise. */
7584 static int
7585 get_insn_spec_code (const_rtx insn)
7587 rtx pat, reg, mem;
7589 pat = PATTERN (insn);
7591 if (GET_CODE (pat) == COND_EXEC)
7592 pat = COND_EXEC_CODE (pat);
7594 if (GET_CODE (pat) != SET)
7595 return -1;
7597 reg = SET_DEST (pat);
7598 if (!REG_P (reg))
7599 return -1;
7601 mem = SET_SRC (pat);
7602 if (GET_CODE (mem) == ZERO_EXTEND)
7603 mem = XEXP (mem, 0);
7605 return get_spec_unspec_code (mem);
7608 /* If INSN is a speculative load, return a ds with the speculation types.
7609 Otherwise [if INSN is a normal instruction] return 0. */
7610 static ds_t
7611 ia64_get_insn_spec_ds (rtx insn)
7613 int code = get_insn_spec_code (insn);
7615 switch (code)
7617 case UNSPEC_LDA:
7618 return BEGIN_DATA;
7620 case UNSPEC_LDS:
7621 case UNSPEC_LDS_A:
7622 return BEGIN_CONTROL;
7624 case UNSPEC_LDSA:
7625 return BEGIN_DATA | BEGIN_CONTROL;
7627 default:
7628 return 0;
7632 /* If INSN is a speculative load return a ds with the speculation types that
7633 will be checked.
7634 Otherwise [if INSN is a normal instruction] return 0. */
7635 static ds_t
7636 ia64_get_insn_checked_ds (rtx insn)
7638 int code = get_insn_spec_code (insn);
7640 switch (code)
7642 case UNSPEC_LDA:
7643 return BEGIN_DATA | BEGIN_CONTROL;
7645 case UNSPEC_LDS:
7646 return BEGIN_CONTROL;
7648 case UNSPEC_LDS_A:
7649 case UNSPEC_LDSA:
7650 return BEGIN_DATA | BEGIN_CONTROL;
7652 default:
7653 return 0;
7657 /* If GEN_P is true, calculate the index of needed speculation check and return
7658 speculative pattern for INSN with speculative mode TS, machine mode
7659 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7660 If GEN_P is false, just calculate the index of needed speculation check. */
7661 static rtx
7662 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7664 rtx pat, new_pat;
7665 gen_func_t gen_load;
7667 gen_load = get_spec_load_gen_function (ts, mode_no);
7669 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7670 copy_rtx (recog_data.operand[1]));
7672 pat = PATTERN (insn);
7673 if (GET_CODE (pat) == COND_EXEC)
7674 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7675 new_pat);
7677 return new_pat;
7680 static bool
7681 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7682 ds_t ds ATTRIBUTE_UNUSED)
7684 return false;
7687 /* Implement targetm.sched.speculate_insn hook.
7688 Check if the INSN can be TS speculative.
7689 If 'no' - return -1.
7690 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7691 If current pattern of the INSN already provides TS speculation,
7692 return 0. */
7693 static int
7694 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7696 int mode_no;
7697 int res;
7699 gcc_assert (!(ts & ~SPECULATIVE));
7701 if (ia64_spec_check_p (insn))
7702 return -1;
7704 if ((ts & BE_IN_SPEC)
7705 && !insn_can_be_in_speculative_p (insn, ts))
7706 return -1;
7708 mode_no = get_mode_no_for_insn (insn);
7710 if (mode_no != SPEC_MODE_INVALID)
7712 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7713 res = 0;
7714 else
7716 res = 1;
7717 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7720 else
7721 res = -1;
7723 return res;
7726 /* Return a function that will generate a check for speculation TS with mode
7727 MODE_NO.
7728 If simple check is needed, pass true for SIMPLE_CHECK_P.
7729 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7730 static gen_func_t
7731 get_spec_check_gen_function (ds_t ts, int mode_no,
7732 bool simple_check_p, bool clearing_check_p)
7734 static gen_func_t gen_ld_c_clr[] = {
7735 gen_movbi_clr,
7736 gen_movqi_clr,
7737 gen_movhi_clr,
7738 gen_movsi_clr,
7739 gen_movdi_clr,
7740 gen_movsf_clr,
7741 gen_movdf_clr,
7742 gen_movxf_clr,
7743 gen_movti_clr,
7744 gen_zero_extendqidi2_clr,
7745 gen_zero_extendhidi2_clr,
7746 gen_zero_extendsidi2_clr,
7748 static gen_func_t gen_ld_c_nc[] = {
7749 gen_movbi_nc,
7750 gen_movqi_nc,
7751 gen_movhi_nc,
7752 gen_movsi_nc,
7753 gen_movdi_nc,
7754 gen_movsf_nc,
7755 gen_movdf_nc,
7756 gen_movxf_nc,
7757 gen_movti_nc,
7758 gen_zero_extendqidi2_nc,
7759 gen_zero_extendhidi2_nc,
7760 gen_zero_extendsidi2_nc,
7762 static gen_func_t gen_chk_a_clr[] = {
7763 gen_advanced_load_check_clr_bi,
7764 gen_advanced_load_check_clr_qi,
7765 gen_advanced_load_check_clr_hi,
7766 gen_advanced_load_check_clr_si,
7767 gen_advanced_load_check_clr_di,
7768 gen_advanced_load_check_clr_sf,
7769 gen_advanced_load_check_clr_df,
7770 gen_advanced_load_check_clr_xf,
7771 gen_advanced_load_check_clr_ti,
7772 gen_advanced_load_check_clr_di,
7773 gen_advanced_load_check_clr_di,
7774 gen_advanced_load_check_clr_di,
7776 static gen_func_t gen_chk_a_nc[] = {
7777 gen_advanced_load_check_nc_bi,
7778 gen_advanced_load_check_nc_qi,
7779 gen_advanced_load_check_nc_hi,
7780 gen_advanced_load_check_nc_si,
7781 gen_advanced_load_check_nc_di,
7782 gen_advanced_load_check_nc_sf,
7783 gen_advanced_load_check_nc_df,
7784 gen_advanced_load_check_nc_xf,
7785 gen_advanced_load_check_nc_ti,
7786 gen_advanced_load_check_nc_di,
7787 gen_advanced_load_check_nc_di,
7788 gen_advanced_load_check_nc_di,
7790 static gen_func_t gen_chk_s[] = {
7791 gen_speculation_check_bi,
7792 gen_speculation_check_qi,
7793 gen_speculation_check_hi,
7794 gen_speculation_check_si,
7795 gen_speculation_check_di,
7796 gen_speculation_check_sf,
7797 gen_speculation_check_df,
7798 gen_speculation_check_xf,
7799 gen_speculation_check_ti,
7800 gen_speculation_check_di,
7801 gen_speculation_check_di,
7802 gen_speculation_check_di,
7805 gen_func_t *gen_check;
7807 if (ts & BEGIN_DATA)
7809 /* We don't need recovery because even if this is ld.sa
7810 ALAT entry will be allocated only if NAT bit is set to zero.
7811 So it is enough to use ld.c here. */
7813 if (simple_check_p)
7815 gcc_assert (mflag_sched_spec_ldc);
7817 if (clearing_check_p)
7818 gen_check = gen_ld_c_clr;
7819 else
7820 gen_check = gen_ld_c_nc;
7822 else
7824 if (clearing_check_p)
7825 gen_check = gen_chk_a_clr;
7826 else
7827 gen_check = gen_chk_a_nc;
7830 else if (ts & BEGIN_CONTROL)
7832 if (simple_check_p)
7833 /* We might want to use ld.sa -> ld.c instead of
7834 ld.s -> chk.s. */
7836 gcc_assert (!ia64_needs_block_p (ts));
7838 if (clearing_check_p)
7839 gen_check = gen_ld_c_clr;
7840 else
7841 gen_check = gen_ld_c_nc;
7843 else
7845 gen_check = gen_chk_s;
7848 else
7849 gcc_unreachable ();
7851 gcc_assert (mode_no >= 0);
7852 return gen_check[mode_no];
7855 /* Return nonzero, if INSN needs branchy recovery check. */
7856 static bool
7857 ia64_needs_block_p (ds_t ts)
7859 if (ts & BEGIN_DATA)
7860 return !mflag_sched_spec_ldc;
7862 gcc_assert ((ts & BEGIN_CONTROL) != 0);
7864 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7867 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7868 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7869 Otherwise, generate a simple check. */
7870 static rtx
7871 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7873 rtx op1, pat, check_pat;
7874 gen_func_t gen_check;
7875 int mode_no;
7877 mode_no = get_mode_no_for_insn (insn);
7878 gcc_assert (mode_no >= 0);
7880 if (label)
7881 op1 = label;
7882 else
7884 gcc_assert (!ia64_needs_block_p (ds));
7885 op1 = copy_rtx (recog_data.operand[1]);
7888 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
7889 true);
7891 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
7893 pat = PATTERN (insn);
7894 if (GET_CODE (pat) == COND_EXEC)
7895 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7896 check_pat);
7898 return check_pat;
7901 /* Return nonzero, if X is branchy recovery check. */
7902 static int
7903 ia64_spec_check_p (rtx x)
7905 x = PATTERN (x);
7906 if (GET_CODE (x) == COND_EXEC)
7907 x = COND_EXEC_CODE (x);
7908 if (GET_CODE (x) == SET)
7909 return ia64_spec_check_src_p (SET_SRC (x));
7910 return 0;
7913 /* Return nonzero, if SRC belongs to recovery check. */
7914 static int
7915 ia64_spec_check_src_p (rtx src)
7917 if (GET_CODE (src) == IF_THEN_ELSE)
7919 rtx t;
7921 t = XEXP (src, 0);
7922 if (GET_CODE (t) == NE)
7924 t = XEXP (t, 0);
7926 if (GET_CODE (t) == UNSPEC)
7928 int code;
7930 code = XINT (t, 1);
7932 if (code == UNSPEC_LDCCLR
7933 || code == UNSPEC_LDCNC
7934 || code == UNSPEC_CHKACLR
7935 || code == UNSPEC_CHKANC
7936 || code == UNSPEC_CHKS)
7938 gcc_assert (code != 0);
7939 return code;
7944 return 0;
7948 /* The following page contains abstract data `bundle states' which are
7949 used for bundling insns (inserting nops and template generation). */
7951 /* The following describes state of insn bundling. */
7953 struct bundle_state
7955 /* Unique bundle state number to identify them in the debugging
7956 output */
7957 int unique_num;
7958 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
7959 /* number nops before and after the insn */
7960 short before_nops_num, after_nops_num;
7961 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7962 insn */
7963 int cost; /* cost of the state in cycles */
7964 int accumulated_insns_num; /* number of all previous insns including
7965 nops. L is considered as 2 insns */
7966 int branch_deviation; /* deviation of previous branches from 3rd slots */
7967 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
7968 struct bundle_state *next; /* next state with the same insn_num */
7969 struct bundle_state *originator; /* originator (previous insn state) */
7970 /* All bundle states are in the following chain. */
7971 struct bundle_state *allocated_states_chain;
7972 /* The DFA State after issuing the insn and the nops. */
7973 state_t dfa_state;
7976 /* The following is map insn number to the corresponding bundle state. */
7978 static struct bundle_state **index_to_bundle_states;
7980 /* The unique number of next bundle state. */
7982 static int bundle_states_num;
7984 /* All allocated bundle states are in the following chain. */
7986 static struct bundle_state *allocated_bundle_states_chain;
7988 /* All allocated but not used bundle states are in the following
7989 chain. */
7991 static struct bundle_state *free_bundle_state_chain;
7994 /* The following function returns a free bundle state. */
7996 static struct bundle_state *
7997 get_free_bundle_state (void)
7999 struct bundle_state *result;
8001 if (free_bundle_state_chain != NULL)
8003 result = free_bundle_state_chain;
8004 free_bundle_state_chain = result->next;
8006 else
8008 result = XNEW (struct bundle_state);
8009 result->dfa_state = xmalloc (dfa_state_size);
8010 result->allocated_states_chain = allocated_bundle_states_chain;
8011 allocated_bundle_states_chain = result;
8013 result->unique_num = bundle_states_num++;
8014 return result;
8018 /* The following function frees given bundle state. */
8020 static void
8021 free_bundle_state (struct bundle_state *state)
8023 state->next = free_bundle_state_chain;
8024 free_bundle_state_chain = state;
8027 /* Start work with abstract data `bundle states'. */
8029 static void
8030 initiate_bundle_states (void)
8032 bundle_states_num = 0;
8033 free_bundle_state_chain = NULL;
8034 allocated_bundle_states_chain = NULL;
8037 /* Finish work with abstract data `bundle states'. */
8039 static void
8040 finish_bundle_states (void)
8042 struct bundle_state *curr_state, *next_state;
8044 for (curr_state = allocated_bundle_states_chain;
8045 curr_state != NULL;
8046 curr_state = next_state)
8048 next_state = curr_state->allocated_states_chain;
8049 free (curr_state->dfa_state);
8050 free (curr_state);
8054 /* Hash table of the bundle states. The key is dfa_state and insn_num
8055 of the bundle states. */
8057 static htab_t bundle_state_table;
8059 /* The function returns hash of BUNDLE_STATE. */
8061 static unsigned
8062 bundle_state_hash (const void *bundle_state)
8064 const struct bundle_state *const state
8065 = (const struct bundle_state *) bundle_state;
8066 unsigned result, i;
8068 for (result = i = 0; i < dfa_state_size; i++)
8069 result += (((unsigned char *) state->dfa_state) [i]
8070 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8071 return result + state->insn_num;
8074 /* The function returns nonzero if the bundle state keys are equal. */
8076 static int
8077 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8079 const struct bundle_state *const state1
8080 = (const struct bundle_state *) bundle_state_1;
8081 const struct bundle_state *const state2
8082 = (const struct bundle_state *) bundle_state_2;
8084 return (state1->insn_num == state2->insn_num
8085 && memcmp (state1->dfa_state, state2->dfa_state,
8086 dfa_state_size) == 0);
8089 /* The function inserts the BUNDLE_STATE into the hash table. The
8090 function returns nonzero if the bundle has been inserted into the
8091 table. The table contains the best bundle state with given key. */
8093 static int
8094 insert_bundle_state (struct bundle_state *bundle_state)
8096 void **entry_ptr;
8098 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8099 if (*entry_ptr == NULL)
8101 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8102 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8103 *entry_ptr = (void *) bundle_state;
8104 return TRUE;
8106 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8107 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8108 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8109 > bundle_state->accumulated_insns_num
8110 || (((struct bundle_state *)
8111 *entry_ptr)->accumulated_insns_num
8112 == bundle_state->accumulated_insns_num
8113 && (((struct bundle_state *)
8114 *entry_ptr)->branch_deviation
8115 > bundle_state->branch_deviation
8116 || (((struct bundle_state *)
8117 *entry_ptr)->branch_deviation
8118 == bundle_state->branch_deviation
8119 && ((struct bundle_state *)
8120 *entry_ptr)->middle_bundle_stops
8121 > bundle_state->middle_bundle_stops))))))
8124 struct bundle_state temp;
8126 temp = *(struct bundle_state *) *entry_ptr;
8127 *(struct bundle_state *) *entry_ptr = *bundle_state;
8128 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8129 *bundle_state = temp;
8131 return FALSE;
8134 /* Start work with the hash table. */
8136 static void
8137 initiate_bundle_state_table (void)
8139 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8140 (htab_del) 0);
8143 /* Finish work with the hash table. */
8145 static void
8146 finish_bundle_state_table (void)
8148 htab_delete (bundle_state_table);
8153 /* The following variable is a insn `nop' used to check bundle states
8154 with different number of inserted nops. */
8156 static rtx ia64_nop;
8158 /* The following function tries to issue NOPS_NUM nops for the current
8159 state without advancing processor cycle. If it failed, the
8160 function returns FALSE and frees the current state. */
8162 static int
8163 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8165 int i;
8167 for (i = 0; i < nops_num; i++)
8168 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8170 free_bundle_state (curr_state);
8171 return FALSE;
8173 return TRUE;
8176 /* The following function tries to issue INSN for the current
8177 state without advancing processor cycle. If it failed, the
8178 function returns FALSE and frees the current state. */
8180 static int
8181 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8183 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8185 free_bundle_state (curr_state);
8186 return FALSE;
8188 return TRUE;
8191 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8192 starting with ORIGINATOR without advancing processor cycle. If
8193 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8194 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8195 If it was successful, the function creates new bundle state and
8196 insert into the hash table and into `index_to_bundle_states'. */
8198 static void
8199 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8200 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8202 struct bundle_state *curr_state;
8204 curr_state = get_free_bundle_state ();
8205 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8206 curr_state->insn = insn;
8207 curr_state->insn_num = originator->insn_num + 1;
8208 curr_state->cost = originator->cost;
8209 curr_state->originator = originator;
8210 curr_state->before_nops_num = before_nops_num;
8211 curr_state->after_nops_num = 0;
8212 curr_state->accumulated_insns_num
8213 = originator->accumulated_insns_num + before_nops_num;
8214 curr_state->branch_deviation = originator->branch_deviation;
8215 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8216 gcc_assert (insn);
8217 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8219 gcc_assert (GET_MODE (insn) != TImode);
8220 if (!try_issue_nops (curr_state, before_nops_num))
8221 return;
8222 if (!try_issue_insn (curr_state, insn))
8223 return;
8224 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8225 if (curr_state->accumulated_insns_num % 3 != 0)
8226 curr_state->middle_bundle_stops++;
8227 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8228 && curr_state->accumulated_insns_num % 3 != 0)
8230 free_bundle_state (curr_state);
8231 return;
8234 else if (GET_MODE (insn) != TImode)
8236 if (!try_issue_nops (curr_state, before_nops_num))
8237 return;
8238 if (!try_issue_insn (curr_state, insn))
8239 return;
8240 curr_state->accumulated_insns_num++;
8241 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8242 && asm_noperands (PATTERN (insn)) < 0);
8244 if (ia64_safe_type (insn) == TYPE_L)
8245 curr_state->accumulated_insns_num++;
8247 else
8249 /* If this is an insn that must be first in a group, then don't allow
8250 nops to be emitted before it. Currently, alloc is the only such
8251 supported instruction. */
8252 /* ??? The bundling automatons should handle this for us, but they do
8253 not yet have support for the first_insn attribute. */
8254 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8256 free_bundle_state (curr_state);
8257 return;
8260 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8261 state_transition (curr_state->dfa_state, NULL);
8262 curr_state->cost++;
8263 if (!try_issue_nops (curr_state, before_nops_num))
8264 return;
8265 if (!try_issue_insn (curr_state, insn))
8266 return;
8267 curr_state->accumulated_insns_num++;
8268 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8269 || asm_noperands (PATTERN (insn)) >= 0)
8271 /* Finish bundle containing asm insn. */
8272 curr_state->after_nops_num
8273 = 3 - curr_state->accumulated_insns_num % 3;
8274 curr_state->accumulated_insns_num
8275 += 3 - curr_state->accumulated_insns_num % 3;
8277 else if (ia64_safe_type (insn) == TYPE_L)
8278 curr_state->accumulated_insns_num++;
8280 if (ia64_safe_type (insn) == TYPE_B)
8281 curr_state->branch_deviation
8282 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8283 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8285 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8287 state_t dfa_state;
8288 struct bundle_state *curr_state1;
8289 struct bundle_state *allocated_states_chain;
8291 curr_state1 = get_free_bundle_state ();
8292 dfa_state = curr_state1->dfa_state;
8293 allocated_states_chain = curr_state1->allocated_states_chain;
8294 *curr_state1 = *curr_state;
8295 curr_state1->dfa_state = dfa_state;
8296 curr_state1->allocated_states_chain = allocated_states_chain;
8297 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8298 dfa_state_size);
8299 curr_state = curr_state1;
8301 if (!try_issue_nops (curr_state,
8302 3 - curr_state->accumulated_insns_num % 3))
8303 return;
8304 curr_state->after_nops_num
8305 = 3 - curr_state->accumulated_insns_num % 3;
8306 curr_state->accumulated_insns_num
8307 += 3 - curr_state->accumulated_insns_num % 3;
8309 if (!insert_bundle_state (curr_state))
8310 free_bundle_state (curr_state);
8311 return;
8314 /* The following function returns position in the two window bundle
8315 for given STATE. */
8317 static int
8318 get_max_pos (state_t state)
8320 if (cpu_unit_reservation_p (state, pos_6))
8321 return 6;
8322 else if (cpu_unit_reservation_p (state, pos_5))
8323 return 5;
8324 else if (cpu_unit_reservation_p (state, pos_4))
8325 return 4;
8326 else if (cpu_unit_reservation_p (state, pos_3))
8327 return 3;
8328 else if (cpu_unit_reservation_p (state, pos_2))
8329 return 2;
8330 else if (cpu_unit_reservation_p (state, pos_1))
8331 return 1;
8332 else
8333 return 0;
8336 /* The function returns code of a possible template for given position
8337 and state. The function should be called only with 2 values of
8338 position equal to 3 or 6. We avoid generating F NOPs by putting
8339 templates containing F insns at the end of the template search
8340 because undocumented anomaly in McKinley derived cores which can
8341 cause stalls if an F-unit insn (including a NOP) is issued within a
8342 six-cycle window after reading certain application registers (such
8343 as ar.bsp). Furthermore, power-considerations also argue against
8344 the use of F-unit instructions unless they're really needed. */
8346 static int
8347 get_template (state_t state, int pos)
8349 switch (pos)
8351 case 3:
8352 if (cpu_unit_reservation_p (state, _0mmi_))
8353 return 1;
8354 else if (cpu_unit_reservation_p (state, _0mii_))
8355 return 0;
8356 else if (cpu_unit_reservation_p (state, _0mmb_))
8357 return 7;
8358 else if (cpu_unit_reservation_p (state, _0mib_))
8359 return 6;
8360 else if (cpu_unit_reservation_p (state, _0mbb_))
8361 return 5;
8362 else if (cpu_unit_reservation_p (state, _0bbb_))
8363 return 4;
8364 else if (cpu_unit_reservation_p (state, _0mmf_))
8365 return 3;
8366 else if (cpu_unit_reservation_p (state, _0mfi_))
8367 return 2;
8368 else if (cpu_unit_reservation_p (state, _0mfb_))
8369 return 8;
8370 else if (cpu_unit_reservation_p (state, _0mlx_))
8371 return 9;
8372 else
8373 gcc_unreachable ();
8374 case 6:
8375 if (cpu_unit_reservation_p (state, _1mmi_))
8376 return 1;
8377 else if (cpu_unit_reservation_p (state, _1mii_))
8378 return 0;
8379 else if (cpu_unit_reservation_p (state, _1mmb_))
8380 return 7;
8381 else if (cpu_unit_reservation_p (state, _1mib_))
8382 return 6;
8383 else if (cpu_unit_reservation_p (state, _1mbb_))
8384 return 5;
8385 else if (cpu_unit_reservation_p (state, _1bbb_))
8386 return 4;
8387 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8388 return 3;
8389 else if (cpu_unit_reservation_p (state, _1mfi_))
8390 return 2;
8391 else if (cpu_unit_reservation_p (state, _1mfb_))
8392 return 8;
8393 else if (cpu_unit_reservation_p (state, _1mlx_))
8394 return 9;
8395 else
8396 gcc_unreachable ();
8397 default:
8398 gcc_unreachable ();
8402 /* True when INSN is important for bundling. */
8403 static bool
8404 important_for_bundling_p (rtx insn)
8406 return (INSN_P (insn)
8407 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8408 && GET_CODE (PATTERN (insn)) != USE
8409 && GET_CODE (PATTERN (insn)) != CLOBBER);
8412 /* The following function returns an insn important for insn bundling
8413 followed by INSN and before TAIL. */
8415 static rtx
8416 get_next_important_insn (rtx insn, rtx tail)
8418 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8419 if (important_for_bundling_p (insn))
8420 return insn;
8421 return NULL_RTX;
8424 /* Add a bundle selector TEMPLATE0 before INSN. */
8426 static void
8427 ia64_add_bundle_selector_before (int template0, rtx insn)
8429 rtx b = gen_bundle_selector (GEN_INT (template0));
8431 ia64_emit_insn_before (b, insn);
8432 #if NR_BUNDLES == 10
8433 if ((template0 == 4 || template0 == 5)
8434 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8436 int i;
8437 rtx note = NULL_RTX;
8439 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8440 first or second slot. If it is and has REG_EH_NOTE set, copy it
8441 to following nops, as br.call sets rp to the address of following
8442 bundle and therefore an EH region end must be on a bundle
8443 boundary. */
8444 insn = PREV_INSN (insn);
8445 for (i = 0; i < 3; i++)
8448 insn = next_active_insn (insn);
8449 while (GET_CODE (insn) == INSN
8450 && get_attr_empty (insn) == EMPTY_YES);
8451 if (GET_CODE (insn) == CALL_INSN)
8452 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8453 else if (note)
8455 int code;
8457 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8458 || code == CODE_FOR_nop_b);
8459 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8460 note = NULL_RTX;
8461 else
8462 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8466 #endif
8469 /* The following function does insn bundling. Bundling means
8470 inserting templates and nop insns to fit insn groups into permitted
8471 templates. Instruction scheduling uses NDFA (non-deterministic
8472 finite automata) encoding informations about the templates and the
8473 inserted nops. Nondeterminism of the automata permits follows
8474 all possible insn sequences very fast.
8476 Unfortunately it is not possible to get information about inserting
8477 nop insns and used templates from the automata states. The
8478 automata only says that we can issue an insn possibly inserting
8479 some nops before it and using some template. Therefore insn
8480 bundling in this function is implemented by using DFA
8481 (deterministic finite automata). We follow all possible insn
8482 sequences by inserting 0-2 nops (that is what the NDFA describe for
8483 insn scheduling) before/after each insn being bundled. We know the
8484 start of simulated processor cycle from insn scheduling (insn
8485 starting a new cycle has TImode).
8487 Simple implementation of insn bundling would create enormous
8488 number of possible insn sequences satisfying information about new
8489 cycle ticks taken from the insn scheduling. To make the algorithm
8490 practical we use dynamic programming. Each decision (about
8491 inserting nops and implicitly about previous decisions) is described
8492 by structure bundle_state (see above). If we generate the same
8493 bundle state (key is automaton state after issuing the insns and
8494 nops for it), we reuse already generated one. As consequence we
8495 reject some decisions which cannot improve the solution and
8496 reduce memory for the algorithm.
8498 When we reach the end of EBB (extended basic block), we choose the
8499 best sequence and then, moving back in EBB, insert templates for
8500 the best alternative. The templates are taken from querying
8501 automaton state for each insn in chosen bundle states.
8503 So the algorithm makes two (forward and backward) passes through
8504 EBB. */
8506 static void
8507 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8509 struct bundle_state *curr_state, *next_state, *best_state;
8510 rtx insn, next_insn;
8511 int insn_num;
8512 int i, bundle_end_p, only_bundle_end_p, asm_p;
8513 int pos = 0, max_pos, template0, template1;
8514 rtx b;
8515 rtx nop;
8516 enum attr_type type;
8518 insn_num = 0;
8519 /* Count insns in the EBB. */
8520 for (insn = NEXT_INSN (prev_head_insn);
8521 insn && insn != tail;
8522 insn = NEXT_INSN (insn))
8523 if (INSN_P (insn))
8524 insn_num++;
8525 if (insn_num == 0)
8526 return;
8527 bundling_p = 1;
8528 dfa_clean_insn_cache ();
8529 initiate_bundle_state_table ();
8530 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8531 /* First (forward) pass -- generation of bundle states. */
8532 curr_state = get_free_bundle_state ();
8533 curr_state->insn = NULL;
8534 curr_state->before_nops_num = 0;
8535 curr_state->after_nops_num = 0;
8536 curr_state->insn_num = 0;
8537 curr_state->cost = 0;
8538 curr_state->accumulated_insns_num = 0;
8539 curr_state->branch_deviation = 0;
8540 curr_state->middle_bundle_stops = 0;
8541 curr_state->next = NULL;
8542 curr_state->originator = NULL;
8543 state_reset (curr_state->dfa_state);
8544 index_to_bundle_states [0] = curr_state;
8545 insn_num = 0;
8546 /* Shift cycle mark if it is put on insn which could be ignored. */
8547 for (insn = NEXT_INSN (prev_head_insn);
8548 insn != tail;
8549 insn = NEXT_INSN (insn))
8550 if (INSN_P (insn)
8551 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8552 || GET_CODE (PATTERN (insn)) == USE
8553 || GET_CODE (PATTERN (insn)) == CLOBBER)
8554 && GET_MODE (insn) == TImode)
8556 PUT_MODE (insn, VOIDmode);
8557 for (next_insn = NEXT_INSN (insn);
8558 next_insn != tail;
8559 next_insn = NEXT_INSN (next_insn))
8560 if (INSN_P (next_insn)
8561 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8562 && GET_CODE (PATTERN (next_insn)) != USE
8563 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8564 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8566 PUT_MODE (next_insn, TImode);
8567 break;
8570 /* Forward pass: generation of bundle states. */
8571 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8572 insn != NULL_RTX;
8573 insn = next_insn)
8575 gcc_assert (INSN_P (insn)
8576 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8577 && GET_CODE (PATTERN (insn)) != USE
8578 && GET_CODE (PATTERN (insn)) != CLOBBER);
8579 type = ia64_safe_type (insn);
8580 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8581 insn_num++;
8582 index_to_bundle_states [insn_num] = NULL;
8583 for (curr_state = index_to_bundle_states [insn_num - 1];
8584 curr_state != NULL;
8585 curr_state = next_state)
8587 pos = curr_state->accumulated_insns_num % 3;
8588 next_state = curr_state->next;
8589 /* We must fill up the current bundle in order to start a
8590 subsequent asm insn in a new bundle. Asm insn is always
8591 placed in a separate bundle. */
8592 only_bundle_end_p
8593 = (next_insn != NULL_RTX
8594 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8595 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8596 /* We may fill up the current bundle if it is the cycle end
8597 without a group barrier. */
8598 bundle_end_p
8599 = (only_bundle_end_p || next_insn == NULL_RTX
8600 || (GET_MODE (next_insn) == TImode
8601 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8602 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8603 || type == TYPE_S)
8604 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8605 only_bundle_end_p);
8606 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8607 only_bundle_end_p);
8608 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8609 only_bundle_end_p);
8611 gcc_assert (index_to_bundle_states [insn_num]);
8612 for (curr_state = index_to_bundle_states [insn_num];
8613 curr_state != NULL;
8614 curr_state = curr_state->next)
8615 if (verbose >= 2 && dump)
8617 /* This structure is taken from generated code of the
8618 pipeline hazard recognizer (see file insn-attrtab.c).
8619 Please don't forget to change the structure if a new
8620 automaton is added to .md file. */
8621 struct DFA_chip
8623 unsigned short one_automaton_state;
8624 unsigned short oneb_automaton_state;
8625 unsigned short two_automaton_state;
8626 unsigned short twob_automaton_state;
8629 fprintf
8630 (dump,
8631 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8632 curr_state->unique_num,
8633 (curr_state->originator == NULL
8634 ? -1 : curr_state->originator->unique_num),
8635 curr_state->cost,
8636 curr_state->before_nops_num, curr_state->after_nops_num,
8637 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8638 curr_state->middle_bundle_stops,
8639 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8640 INSN_UID (insn));
8644 /* We should find a solution because the 2nd insn scheduling has
8645 found one. */
8646 gcc_assert (index_to_bundle_states [insn_num]);
8647 /* Find a state corresponding to the best insn sequence. */
8648 best_state = NULL;
8649 for (curr_state = index_to_bundle_states [insn_num];
8650 curr_state != NULL;
8651 curr_state = curr_state->next)
8652 /* We are just looking at the states with fully filled up last
8653 bundle. The first we prefer insn sequences with minimal cost
8654 then with minimal inserted nops and finally with branch insns
8655 placed in the 3rd slots. */
8656 if (curr_state->accumulated_insns_num % 3 == 0
8657 && (best_state == NULL || best_state->cost > curr_state->cost
8658 || (best_state->cost == curr_state->cost
8659 && (curr_state->accumulated_insns_num
8660 < best_state->accumulated_insns_num
8661 || (curr_state->accumulated_insns_num
8662 == best_state->accumulated_insns_num
8663 && (curr_state->branch_deviation
8664 < best_state->branch_deviation
8665 || (curr_state->branch_deviation
8666 == best_state->branch_deviation
8667 && curr_state->middle_bundle_stops
8668 < best_state->middle_bundle_stops)))))))
8669 best_state = curr_state;
8670 /* Second (backward) pass: adding nops and templates. */
8671 gcc_assert (best_state);
8672 insn_num = best_state->before_nops_num;
8673 template0 = template1 = -1;
8674 for (curr_state = best_state;
8675 curr_state->originator != NULL;
8676 curr_state = curr_state->originator)
8678 insn = curr_state->insn;
8679 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8680 || asm_noperands (PATTERN (insn)) >= 0);
8681 insn_num++;
8682 if (verbose >= 2 && dump)
8684 struct DFA_chip
8686 unsigned short one_automaton_state;
8687 unsigned short oneb_automaton_state;
8688 unsigned short two_automaton_state;
8689 unsigned short twob_automaton_state;
8692 fprintf
8693 (dump,
8694 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8695 curr_state->unique_num,
8696 (curr_state->originator == NULL
8697 ? -1 : curr_state->originator->unique_num),
8698 curr_state->cost,
8699 curr_state->before_nops_num, curr_state->after_nops_num,
8700 curr_state->accumulated_insns_num, curr_state->branch_deviation,
8701 curr_state->middle_bundle_stops,
8702 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8703 INSN_UID (insn));
8705 /* Find the position in the current bundle window. The window can
8706 contain at most two bundles. Two bundle window means that
8707 the processor will make two bundle rotation. */
8708 max_pos = get_max_pos (curr_state->dfa_state);
8709 if (max_pos == 6
8710 /* The following (negative template number) means that the
8711 processor did one bundle rotation. */
8712 || (max_pos == 3 && template0 < 0))
8714 /* We are at the end of the window -- find template(s) for
8715 its bundle(s). */
8716 pos = max_pos;
8717 if (max_pos == 3)
8718 template0 = get_template (curr_state->dfa_state, 3);
8719 else
8721 template1 = get_template (curr_state->dfa_state, 3);
8722 template0 = get_template (curr_state->dfa_state, 6);
8725 if (max_pos > 3 && template1 < 0)
8726 /* It may happen when we have the stop inside a bundle. */
8728 gcc_assert (pos <= 3);
8729 template1 = get_template (curr_state->dfa_state, 3);
8730 pos += 3;
8732 if (!asm_p)
8733 /* Emit nops after the current insn. */
8734 for (i = 0; i < curr_state->after_nops_num; i++)
8736 nop = gen_nop ();
8737 emit_insn_after (nop, insn);
8738 pos--;
8739 gcc_assert (pos >= 0);
8740 if (pos % 3 == 0)
8742 /* We are at the start of a bundle: emit the template
8743 (it should be defined). */
8744 gcc_assert (template0 >= 0);
8745 ia64_add_bundle_selector_before (template0, nop);
8746 /* If we have two bundle window, we make one bundle
8747 rotation. Otherwise template0 will be undefined
8748 (negative value). */
8749 template0 = template1;
8750 template1 = -1;
8753 /* Move the position backward in the window. Group barrier has
8754 no slot. Asm insn takes all bundle. */
8755 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8756 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8757 && asm_noperands (PATTERN (insn)) < 0)
8758 pos--;
8759 /* Long insn takes 2 slots. */
8760 if (ia64_safe_type (insn) == TYPE_L)
8761 pos--;
8762 gcc_assert (pos >= 0);
8763 if (pos % 3 == 0
8764 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8765 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8766 && asm_noperands (PATTERN (insn)) < 0)
8768 /* The current insn is at the bundle start: emit the
8769 template. */
8770 gcc_assert (template0 >= 0);
8771 ia64_add_bundle_selector_before (template0, insn);
8772 b = PREV_INSN (insn);
8773 insn = b;
8774 /* See comment above in analogous place for emitting nops
8775 after the insn. */
8776 template0 = template1;
8777 template1 = -1;
8779 /* Emit nops after the current insn. */
8780 for (i = 0; i < curr_state->before_nops_num; i++)
8782 nop = gen_nop ();
8783 ia64_emit_insn_before (nop, insn);
8784 nop = PREV_INSN (insn);
8785 insn = nop;
8786 pos--;
8787 gcc_assert (pos >= 0);
8788 if (pos % 3 == 0)
8790 /* See comment above in analogous place for emitting nops
8791 after the insn. */
8792 gcc_assert (template0 >= 0);
8793 ia64_add_bundle_selector_before (template0, insn);
8794 b = PREV_INSN (insn);
8795 insn = b;
8796 template0 = template1;
8797 template1 = -1;
8802 #ifdef ENABLE_CHECKING
8804 /* Assert right calculation of middle_bundle_stops. */
8805 int num = best_state->middle_bundle_stops;
8806 bool start_bundle = true, end_bundle = false;
8808 for (insn = NEXT_INSN (prev_head_insn);
8809 insn && insn != tail;
8810 insn = NEXT_INSN (insn))
8812 if (!INSN_P (insn))
8813 continue;
8814 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8815 start_bundle = true;
8816 else
8818 rtx next_insn;
8820 for (next_insn = NEXT_INSN (insn);
8821 next_insn && next_insn != tail;
8822 next_insn = NEXT_INSN (next_insn))
8823 if (INSN_P (next_insn)
8824 && (ia64_safe_itanium_class (next_insn)
8825 != ITANIUM_CLASS_IGNORE
8826 || recog_memoized (next_insn)
8827 == CODE_FOR_bundle_selector)
8828 && GET_CODE (PATTERN (next_insn)) != USE
8829 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8830 break;
8832 end_bundle = next_insn == NULL_RTX
8833 || next_insn == tail
8834 || (INSN_P (next_insn)
8835 && recog_memoized (next_insn)
8836 == CODE_FOR_bundle_selector);
8837 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8838 && !start_bundle && !end_bundle
8839 && next_insn
8840 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8841 && asm_noperands (PATTERN (next_insn)) < 0)
8842 num--;
8844 start_bundle = false;
8848 gcc_assert (num == 0);
8850 #endif
8852 free (index_to_bundle_states);
8853 finish_bundle_state_table ();
8854 bundling_p = 0;
8855 dfa_clean_insn_cache ();
8858 /* The following function is called at the end of scheduling BB or
8859 EBB. After reload, it inserts stop bits and does insn bundling. */
8861 static void
8862 ia64_sched_finish (FILE *dump, int sched_verbose)
8864 if (sched_verbose)
8865 fprintf (dump, "// Finishing schedule.\n");
8866 if (!reload_completed)
8867 return;
8868 if (reload_completed)
8870 final_emit_insn_group_barriers (dump);
8871 bundling (dump, sched_verbose, current_sched_info->prev_head,
8872 current_sched_info->next_tail);
8873 if (sched_verbose && dump)
8874 fprintf (dump, "// finishing %d-%d\n",
8875 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8876 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8878 return;
8882 /* The following function inserts stop bits in scheduled BB or EBB. */
8884 static void
8885 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8887 rtx insn;
8888 int need_barrier_p = 0;
8889 int seen_good_insn = 0;
8891 init_insn_group_barriers ();
8893 for (insn = NEXT_INSN (current_sched_info->prev_head);
8894 insn != current_sched_info->next_tail;
8895 insn = NEXT_INSN (insn))
8897 if (GET_CODE (insn) == BARRIER)
8899 rtx last = prev_active_insn (insn);
8901 if (! last)
8902 continue;
8903 if (GET_CODE (last) == JUMP_INSN
8904 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8905 last = prev_active_insn (last);
8906 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8907 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8909 init_insn_group_barriers ();
8910 seen_good_insn = 0;
8911 need_barrier_p = 0;
8913 else if (NONDEBUG_INSN_P (insn))
8915 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8917 init_insn_group_barriers ();
8918 seen_good_insn = 0;
8919 need_barrier_p = 0;
8921 else if (need_barrier_p || group_barrier_needed (insn)
8922 || (mflag_sched_stop_bits_after_every_cycle
8923 && GET_MODE (insn) == TImode
8924 && seen_good_insn))
8926 if (TARGET_EARLY_STOP_BITS)
8928 rtx last;
8930 for (last = insn;
8931 last != current_sched_info->prev_head;
8932 last = PREV_INSN (last))
8933 if (INSN_P (last) && GET_MODE (last) == TImode
8934 && stops_p [INSN_UID (last)])
8935 break;
8936 if (last == current_sched_info->prev_head)
8937 last = insn;
8938 last = prev_active_insn (last);
8939 if (last
8940 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8941 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8942 last);
8943 init_insn_group_barriers ();
8944 for (last = NEXT_INSN (last);
8945 last != insn;
8946 last = NEXT_INSN (last))
8947 if (INSN_P (last))
8949 group_barrier_needed (last);
8950 if (recog_memoized (last) >= 0
8951 && important_for_bundling_p (last))
8952 seen_good_insn = 1;
8955 else
8957 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8958 insn);
8959 init_insn_group_barriers ();
8960 seen_good_insn = 0;
8962 group_barrier_needed (insn);
8963 if (recog_memoized (insn) >= 0
8964 && important_for_bundling_p (insn))
8965 seen_good_insn = 1;
8967 else if (recog_memoized (insn) >= 0
8968 && important_for_bundling_p (insn))
8969 seen_good_insn = 1;
8970 need_barrier_p = (GET_CODE (insn) == CALL_INSN
8971 || GET_CODE (PATTERN (insn)) == ASM_INPUT
8972 || asm_noperands (PATTERN (insn)) >= 0);
8979 /* If the following function returns TRUE, we will use the DFA
8980 insn scheduler. */
8982 static int
8983 ia64_first_cycle_multipass_dfa_lookahead (void)
8985 return (reload_completed ? 6 : 4);
8988 /* The following function initiates variable `dfa_pre_cycle_insn'. */
8990 static void
8991 ia64_init_dfa_pre_cycle_insn (void)
8993 if (temp_dfa_state == NULL)
8995 dfa_state_size = state_size ();
8996 temp_dfa_state = xmalloc (dfa_state_size);
8997 prev_cycle_state = xmalloc (dfa_state_size);
8999 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9000 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9001 recog_memoized (dfa_pre_cycle_insn);
9002 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9003 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9004 recog_memoized (dfa_stop_insn);
9007 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9008 used by the DFA insn scheduler. */
9010 static rtx
9011 ia64_dfa_pre_cycle_insn (void)
9013 return dfa_pre_cycle_insn;
9016 /* The following function returns TRUE if PRODUCER (of type ilog or
9017 ld) produces address for CONSUMER (of type st or stf). */
9020 ia64_st_address_bypass_p (rtx producer, rtx consumer)
9022 rtx dest, reg, mem;
9024 gcc_assert (producer && consumer);
9025 dest = ia64_single_set (producer);
9026 gcc_assert (dest);
9027 reg = SET_DEST (dest);
9028 gcc_assert (reg);
9029 if (GET_CODE (reg) == SUBREG)
9030 reg = SUBREG_REG (reg);
9031 gcc_assert (GET_CODE (reg) == REG);
9033 dest = ia64_single_set (consumer);
9034 gcc_assert (dest);
9035 mem = SET_DEST (dest);
9036 gcc_assert (mem && GET_CODE (mem) == MEM);
9037 return reg_mentioned_p (reg, mem);
9040 /* The following function returns TRUE if PRODUCER (of type ilog or
9041 ld) produces address for CONSUMER (of type ld or fld). */
9044 ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9046 rtx dest, src, reg, mem;
9048 gcc_assert (producer && consumer);
9049 dest = ia64_single_set (producer);
9050 gcc_assert (dest);
9051 reg = SET_DEST (dest);
9052 gcc_assert (reg);
9053 if (GET_CODE (reg) == SUBREG)
9054 reg = SUBREG_REG (reg);
9055 gcc_assert (GET_CODE (reg) == REG);
9057 src = ia64_single_set (consumer);
9058 gcc_assert (src);
9059 mem = SET_SRC (src);
9060 gcc_assert (mem);
9062 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9063 mem = XVECEXP (mem, 0, 0);
9064 else if (GET_CODE (mem) == IF_THEN_ELSE)
9065 /* ??? Is this bypass necessary for ld.c? */
9067 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9068 mem = XEXP (mem, 1);
9071 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9072 mem = XEXP (mem, 0);
9074 if (GET_CODE (mem) == UNSPEC)
9076 int c = XINT (mem, 1);
9078 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9079 || c == UNSPEC_LDSA);
9080 mem = XVECEXP (mem, 0, 0);
9083 /* Note that LO_SUM is used for GOT loads. */
9084 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9086 return reg_mentioned_p (reg, mem);
9089 /* The following function returns TRUE if INSN produces address for a
9090 load/store insn. We will place such insns into M slot because it
9091 decreases its latency time. */
9094 ia64_produce_address_p (rtx insn)
9096 return insn->call;
9100 /* Emit pseudo-ops for the assembler to describe predicate relations.
9101 At present this assumes that we only consider predicate pairs to
9102 be mutex, and that the assembler can deduce proper values from
9103 straight-line code. */
9105 static void
9106 emit_predicate_relation_info (void)
9108 basic_block bb;
9110 FOR_EACH_BB_REVERSE (bb)
9112 int r;
9113 rtx head = BB_HEAD (bb);
9115 /* We only need such notes at code labels. */
9116 if (GET_CODE (head) != CODE_LABEL)
9117 continue;
9118 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9119 head = NEXT_INSN (head);
9121 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9122 grabbing the entire block of predicate registers. */
9123 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9124 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9126 rtx p = gen_rtx_REG (BImode, r);
9127 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9128 if (head == BB_END (bb))
9129 BB_END (bb) = n;
9130 head = n;
9134 /* Look for conditional calls that do not return, and protect predicate
9135 relations around them. Otherwise the assembler will assume the call
9136 returns, and complain about uses of call-clobbered predicates after
9137 the call. */
9138 FOR_EACH_BB_REVERSE (bb)
9140 rtx insn = BB_HEAD (bb);
9142 while (1)
9144 if (GET_CODE (insn) == CALL_INSN
9145 && GET_CODE (PATTERN (insn)) == COND_EXEC
9146 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9148 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9149 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9150 if (BB_HEAD (bb) == insn)
9151 BB_HEAD (bb) = b;
9152 if (BB_END (bb) == insn)
9153 BB_END (bb) = a;
9156 if (insn == BB_END (bb))
9157 break;
9158 insn = NEXT_INSN (insn);
9163 /* Perform machine dependent operations on the rtl chain INSNS. */
9165 static void
9166 ia64_reorg (void)
9168 /* We are freeing block_for_insn in the toplev to keep compatibility
9169 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9170 compute_bb_for_insn ();
9172 /* If optimizing, we'll have split before scheduling. */
9173 if (optimize == 0)
9174 split_all_insns ();
9176 if (optimize && ia64_flag_schedule_insns2
9177 && dbg_cnt (ia64_sched2))
9179 timevar_push (TV_SCHED2);
9180 ia64_final_schedule = 1;
9182 initiate_bundle_states ();
9183 ia64_nop = make_insn_raw (gen_nop ());
9184 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9185 recog_memoized (ia64_nop);
9186 clocks_length = get_max_uid () + 1;
9187 stops_p = XCNEWVEC (char, clocks_length);
9189 if (ia64_tune == PROCESSOR_ITANIUM2)
9191 pos_1 = get_cpu_unit_code ("2_1");
9192 pos_2 = get_cpu_unit_code ("2_2");
9193 pos_3 = get_cpu_unit_code ("2_3");
9194 pos_4 = get_cpu_unit_code ("2_4");
9195 pos_5 = get_cpu_unit_code ("2_5");
9196 pos_6 = get_cpu_unit_code ("2_6");
9197 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9198 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9199 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9200 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9201 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9202 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9203 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9204 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9205 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9206 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9207 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9208 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9209 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9210 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9211 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9212 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9213 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9214 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9215 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9216 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9218 else
9220 pos_1 = get_cpu_unit_code ("1_1");
9221 pos_2 = get_cpu_unit_code ("1_2");
9222 pos_3 = get_cpu_unit_code ("1_3");
9223 pos_4 = get_cpu_unit_code ("1_4");
9224 pos_5 = get_cpu_unit_code ("1_5");
9225 pos_6 = get_cpu_unit_code ("1_6");
9226 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9227 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9228 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9229 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9230 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9231 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9232 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9233 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9234 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9235 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9236 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9237 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9238 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9239 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9240 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9241 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9242 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9243 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9244 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9245 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9248 if (flag_selective_scheduling2
9249 && !maybe_skip_selective_scheduling ())
9250 run_selective_scheduling ();
9251 else
9252 schedule_ebbs ();
9254 /* Redo alignment computation, as it might gone wrong. */
9255 compute_alignments ();
9257 /* We cannot reuse this one because it has been corrupted by the
9258 evil glat. */
9259 finish_bundle_states ();
9260 free (stops_p);
9261 stops_p = NULL;
9262 emit_insn_group_barriers (dump_file);
9264 ia64_final_schedule = 0;
9265 timevar_pop (TV_SCHED2);
9267 else
9268 emit_all_insn_group_barriers (dump_file);
9270 df_analyze ();
9272 /* A call must not be the last instruction in a function, so that the
9273 return address is still within the function, so that unwinding works
9274 properly. Note that IA-64 differs from dwarf2 on this point. */
9275 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9277 rtx insn;
9278 int saw_stop = 0;
9280 insn = get_last_insn ();
9281 if (! INSN_P (insn))
9282 insn = prev_active_insn (insn);
9283 if (insn)
9285 /* Skip over insns that expand to nothing. */
9286 while (GET_CODE (insn) == INSN
9287 && get_attr_empty (insn) == EMPTY_YES)
9289 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9290 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9291 saw_stop = 1;
9292 insn = prev_active_insn (insn);
9294 if (GET_CODE (insn) == CALL_INSN)
9296 if (! saw_stop)
9297 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9298 emit_insn (gen_break_f ());
9299 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9304 emit_predicate_relation_info ();
9306 if (ia64_flag_var_tracking)
9308 timevar_push (TV_VAR_TRACKING);
9309 variable_tracking_main ();
9310 timevar_pop (TV_VAR_TRACKING);
9312 df_finish_pass (false);
9315 /* Return true if REGNO is used by the epilogue. */
9318 ia64_epilogue_uses (int regno)
9320 switch (regno)
9322 case R_GR (1):
9323 /* With a call to a function in another module, we will write a new
9324 value to "gp". After returning from such a call, we need to make
9325 sure the function restores the original gp-value, even if the
9326 function itself does not use the gp anymore. */
9327 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9329 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9330 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9331 /* For functions defined with the syscall_linkage attribute, all
9332 input registers are marked as live at all function exits. This
9333 prevents the register allocator from using the input registers,
9334 which in turn makes it possible to restart a system call after
9335 an interrupt without having to save/restore the input registers.
9336 This also prevents kernel data from leaking to application code. */
9337 return lookup_attribute ("syscall_linkage",
9338 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9340 case R_BR (0):
9341 /* Conditional return patterns can't represent the use of `b0' as
9342 the return address, so we force the value live this way. */
9343 return 1;
9345 case AR_PFS_REGNUM:
9346 /* Likewise for ar.pfs, which is used by br.ret. */
9347 return 1;
9349 default:
9350 return 0;
9354 /* Return true if REGNO is used by the frame unwinder. */
9357 ia64_eh_uses (int regno)
9359 unsigned int r;
9361 if (! reload_completed)
9362 return 0;
9364 if (regno == 0)
9365 return 0;
9367 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9368 if (regno == current_frame_info.r[r]
9369 || regno == emitted_frame_related_regs[r])
9370 return 1;
9372 return 0;
9375 /* Return true if this goes in small data/bss. */
9377 /* ??? We could also support own long data here. Generating movl/add/ld8
9378 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9379 code faster because there is one less load. This also includes incomplete
9380 types which can't go in sdata/sbss. */
9382 static bool
9383 ia64_in_small_data_p (const_tree exp)
9385 if (TARGET_NO_SDATA)
9386 return false;
9388 /* We want to merge strings, so we never consider them small data. */
9389 if (TREE_CODE (exp) == STRING_CST)
9390 return false;
9392 /* Functions are never small data. */
9393 if (TREE_CODE (exp) == FUNCTION_DECL)
9394 return false;
9396 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9398 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9400 if (strcmp (section, ".sdata") == 0
9401 || strncmp (section, ".sdata.", 7) == 0
9402 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9403 || strcmp (section, ".sbss") == 0
9404 || strncmp (section, ".sbss.", 6) == 0
9405 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9406 return true;
9408 else
9410 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9412 /* If this is an incomplete type with size 0, then we can't put it
9413 in sdata because it might be too big when completed. */
9414 if (size > 0 && size <= ia64_section_threshold)
9415 return true;
9418 return false;
9421 /* Output assembly directives for prologue regions. */
9423 /* The current basic block number. */
9425 static bool last_block;
9427 /* True if we need a copy_state command at the start of the next block. */
9429 static bool need_copy_state;
9431 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9432 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9433 #endif
9435 /* Emit a debugging label after a call-frame-related insn. We'd
9436 rather output the label right away, but we'd have to output it
9437 after, not before, the instruction, and the instruction has not
9438 been output yet. So we emit the label after the insn, delete it to
9439 avoid introducing basic blocks, and mark it as preserved, such that
9440 it is still output, given that it is referenced in debug info. */
9442 static const char *
9443 ia64_emit_deleted_label_after_insn (rtx insn)
9445 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9446 rtx lb = gen_label_rtx ();
9447 rtx label_insn = emit_label_after (lb, insn);
9449 LABEL_PRESERVE_P (lb) = 1;
9451 delete_insn (label_insn);
9453 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9455 return xstrdup (label);
9458 /* Define the CFA after INSN with the steady-state definition. */
9460 static void
9461 ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9463 rtx fp = frame_pointer_needed
9464 ? hard_frame_pointer_rtx
9465 : stack_pointer_rtx;
9466 const char *label = ia64_emit_deleted_label_after_insn (insn);
9468 if (!frame)
9469 return;
9471 dwarf2out_def_cfa
9472 (label, REGNO (fp),
9473 ia64_initial_elimination_offset
9474 (REGNO (arg_pointer_rtx), REGNO (fp))
9475 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9478 /* The generic dwarf2 frame debug info generator does not define a
9479 separate region for the very end of the epilogue, so refrain from
9480 doing so in the IA64-specific code as well. */
9482 #define IA64_CHANGE_CFA_IN_EPILOGUE 0
9484 /* The function emits unwind directives for the start of an epilogue. */
9486 static void
9487 process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9489 /* If this isn't the last block of the function, then we need to label the
9490 current state, and copy it back in at the start of the next block. */
9492 if (!last_block)
9494 if (unwind)
9495 fprintf (asm_out_file, "\t.label_state %d\n",
9496 ++cfun->machine->state_num);
9497 need_copy_state = true;
9500 if (unwind)
9501 fprintf (asm_out_file, "\t.restore sp\n");
9502 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9503 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9504 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9507 /* This function processes a SET pattern looking for specific patterns
9508 which result in emitting an assembly directive required for unwinding. */
9510 static int
9511 process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
9513 rtx src = SET_SRC (pat);
9514 rtx dest = SET_DEST (pat);
9515 int src_regno, dest_regno;
9517 /* Look for the ALLOC insn. */
9518 if (GET_CODE (src) == UNSPEC_VOLATILE
9519 && XINT (src, 1) == UNSPECV_ALLOC
9520 && GET_CODE (dest) == REG)
9522 dest_regno = REGNO (dest);
9524 /* If this is the final destination for ar.pfs, then this must
9525 be the alloc in the prologue. */
9526 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
9528 if (unwind)
9529 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9530 ia64_dbx_register_number (dest_regno));
9532 else
9534 /* This must be an alloc before a sibcall. We must drop the
9535 old frame info. The easiest way to drop the old frame
9536 info is to ensure we had a ".restore sp" directive
9537 followed by a new prologue. If the procedure doesn't
9538 have a memory-stack frame, we'll issue a dummy ".restore
9539 sp" now. */
9540 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
9541 /* if haven't done process_epilogue() yet, do it now */
9542 process_epilogue (asm_out_file, insn, unwind, frame);
9543 if (unwind)
9544 fprintf (asm_out_file, "\t.prologue\n");
9546 return 1;
9549 /* Look for SP = .... */
9550 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9552 if (GET_CODE (src) == PLUS)
9554 rtx op0 = XEXP (src, 0);
9555 rtx op1 = XEXP (src, 1);
9557 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9559 if (INTVAL (op1) < 0)
9561 gcc_assert (!frame_pointer_needed);
9562 if (unwind)
9563 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9564 -INTVAL (op1));
9565 ia64_dwarf2out_def_steady_cfa (insn, frame);
9567 else
9568 process_epilogue (asm_out_file, insn, unwind, frame);
9570 else
9572 gcc_assert (GET_CODE (src) == REG
9573 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
9574 process_epilogue (asm_out_file, insn, unwind, frame);
9577 return 1;
9580 /* Register move we need to look at. */
9581 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9583 src_regno = REGNO (src);
9584 dest_regno = REGNO (dest);
9586 switch (src_regno)
9588 case BR_REG (0):
9589 /* Saving return address pointer. */
9590 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9591 if (unwind)
9592 fprintf (asm_out_file, "\t.save rp, r%d\n",
9593 ia64_dbx_register_number (dest_regno));
9594 return 1;
9596 case PR_REG (0):
9597 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9598 if (unwind)
9599 fprintf (asm_out_file, "\t.save pr, r%d\n",
9600 ia64_dbx_register_number (dest_regno));
9601 return 1;
9603 case AR_UNAT_REGNUM:
9604 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9605 if (unwind)
9606 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9607 ia64_dbx_register_number (dest_regno));
9608 return 1;
9610 case AR_LC_REGNUM:
9611 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9612 if (unwind)
9613 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9614 ia64_dbx_register_number (dest_regno));
9615 return 1;
9617 case STACK_POINTER_REGNUM:
9618 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9619 && frame_pointer_needed);
9620 if (unwind)
9621 fprintf (asm_out_file, "\t.vframe r%d\n",
9622 ia64_dbx_register_number (dest_regno));
9623 ia64_dwarf2out_def_steady_cfa (insn, frame);
9624 return 1;
9626 default:
9627 /* Everything else should indicate being stored to memory. */
9628 gcc_unreachable ();
9632 /* Memory store we need to look at. */
9633 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
9635 long off;
9636 rtx base;
9637 const char *saveop;
9639 if (GET_CODE (XEXP (dest, 0)) == REG)
9641 base = XEXP (dest, 0);
9642 off = 0;
9644 else
9646 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9647 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9648 base = XEXP (XEXP (dest, 0), 0);
9649 off = INTVAL (XEXP (XEXP (dest, 0), 1));
9652 if (base == hard_frame_pointer_rtx)
9654 saveop = ".savepsp";
9655 off = - off;
9657 else
9659 gcc_assert (base == stack_pointer_rtx);
9660 saveop = ".savesp";
9663 src_regno = REGNO (src);
9664 switch (src_regno)
9666 case BR_REG (0):
9667 gcc_assert (!current_frame_info.r[reg_save_b0]);
9668 if (unwind)
9669 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
9670 return 1;
9672 case PR_REG (0):
9673 gcc_assert (!current_frame_info.r[reg_save_pr]);
9674 if (unwind)
9675 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
9676 return 1;
9678 case AR_LC_REGNUM:
9679 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9680 if (unwind)
9681 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
9682 return 1;
9684 case AR_PFS_REGNUM:
9685 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9686 if (unwind)
9687 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9688 return 1;
9690 case AR_UNAT_REGNUM:
9691 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9692 if (unwind)
9693 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9694 return 1;
9696 case GR_REG (4):
9697 case GR_REG (5):
9698 case GR_REG (6):
9699 case GR_REG (7):
9700 if (unwind)
9701 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9702 1 << (src_regno - GR_REG (4)));
9703 return 1;
9705 case BR_REG (1):
9706 case BR_REG (2):
9707 case BR_REG (3):
9708 case BR_REG (4):
9709 case BR_REG (5):
9710 if (unwind)
9711 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9712 1 << (src_regno - BR_REG (1)));
9713 return 1;
9715 case FR_REG (2):
9716 case FR_REG (3):
9717 case FR_REG (4):
9718 case FR_REG (5):
9719 if (unwind)
9720 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9721 1 << (src_regno - FR_REG (2)));
9722 return 1;
9724 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9725 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9726 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9727 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9728 if (unwind)
9729 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9730 1 << (src_regno - FR_REG (12)));
9731 return 1;
9733 default:
9734 return 0;
9738 return 0;
9742 /* This function looks at a single insn and emits any directives
9743 required to unwind this insn. */
9744 void
9745 process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9747 bool unwind = (flag_unwind_tables
9748 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9749 bool frame = dwarf2out_do_frame ();
9751 if (unwind || frame)
9753 rtx pat;
9755 if (NOTE_INSN_BASIC_BLOCK_P (insn))
9757 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9759 /* Restore unwind state from immediately before the epilogue. */
9760 if (need_copy_state)
9762 if (unwind)
9764 fprintf (asm_out_file, "\t.body\n");
9765 fprintf (asm_out_file, "\t.copy_state %d\n",
9766 cfun->machine->state_num);
9768 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9769 ia64_dwarf2out_def_steady_cfa (insn, frame);
9770 need_copy_state = false;
9774 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9775 return;
9777 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9778 if (pat)
9779 pat = XEXP (pat, 0);
9780 else
9781 pat = PATTERN (insn);
9783 switch (GET_CODE (pat))
9785 case SET:
9786 process_set (asm_out_file, pat, insn, unwind, frame);
9787 break;
9789 case PARALLEL:
9791 int par_index;
9792 int limit = XVECLEN (pat, 0);
9793 for (par_index = 0; par_index < limit; par_index++)
9795 rtx x = XVECEXP (pat, 0, par_index);
9796 if (GET_CODE (x) == SET)
9797 process_set (asm_out_file, x, insn, unwind, frame);
9799 break;
9802 default:
9803 gcc_unreachable ();
9809 enum ia64_builtins
9811 IA64_BUILTIN_BSP,
9812 IA64_BUILTIN_COPYSIGNQ,
9813 IA64_BUILTIN_FABSQ,
9814 IA64_BUILTIN_FLUSHRS,
9815 IA64_BUILTIN_INFQ,
9816 IA64_BUILTIN_HUGE_VALQ
9819 void
9820 ia64_init_builtins (void)
9822 tree fpreg_type;
9823 tree float80_type;
9825 /* The __fpreg type. */
9826 fpreg_type = make_node (REAL_TYPE);
9827 TYPE_PRECISION (fpreg_type) = 82;
9828 layout_type (fpreg_type);
9829 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9831 /* The __float80 type. */
9832 float80_type = make_node (REAL_TYPE);
9833 TYPE_PRECISION (float80_type) = 80;
9834 layout_type (float80_type);
9835 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9837 /* The __float128 type. */
9838 if (!TARGET_HPUX)
9840 tree ftype, decl;
9841 tree float128_type = make_node (REAL_TYPE);
9843 TYPE_PRECISION (float128_type) = 128;
9844 layout_type (float128_type);
9845 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9847 /* TFmode support builtins. */
9848 ftype = build_function_type (float128_type, void_list_node);
9849 add_builtin_function ("__builtin_infq", ftype,
9850 IA64_BUILTIN_INFQ, BUILT_IN_MD,
9851 NULL, NULL_TREE);
9853 add_builtin_function ("__builtin_huge_valq", ftype,
9854 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
9855 NULL, NULL_TREE);
9857 ftype = build_function_type_list (float128_type,
9858 float128_type,
9859 NULL_TREE);
9860 decl = add_builtin_function ("__builtin_fabsq", ftype,
9861 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
9862 "__fabstf2", NULL_TREE);
9863 TREE_READONLY (decl) = 1;
9865 ftype = build_function_type_list (float128_type,
9866 float128_type,
9867 float128_type,
9868 NULL_TREE);
9869 decl = add_builtin_function ("__builtin_copysignq", ftype,
9870 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
9871 "__copysigntf3", NULL_TREE);
9872 TREE_READONLY (decl) = 1;
9874 else
9875 /* Under HPUX, this is a synonym for "long double". */
9876 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9877 "__float128");
9879 /* Fwrite on VMS is non-standard. */
9880 if (TARGET_ABI_OPEN_VMS)
9882 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
9883 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
9886 #define def_builtin(name, type, code) \
9887 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
9888 NULL, NULL_TREE)
9890 def_builtin ("__builtin_ia64_bsp",
9891 build_function_type (ptr_type_node, void_list_node),
9892 IA64_BUILTIN_BSP);
9894 def_builtin ("__builtin_ia64_flushrs",
9895 build_function_type (void_type_node, void_list_node),
9896 IA64_BUILTIN_FLUSHRS);
9898 #undef def_builtin
9900 if (TARGET_HPUX)
9902 if (built_in_decls [BUILT_IN_FINITE])
9903 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9904 "_Isfinite");
9905 if (built_in_decls [BUILT_IN_FINITEF])
9906 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9907 "_Isfinitef");
9908 if (built_in_decls [BUILT_IN_FINITEL])
9909 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9910 "_Isfinitef128");
9915 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9916 enum machine_mode mode ATTRIBUTE_UNUSED,
9917 int ignore ATTRIBUTE_UNUSED)
9919 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9920 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9922 switch (fcode)
9924 case IA64_BUILTIN_BSP:
9925 if (! target || ! register_operand (target, DImode))
9926 target = gen_reg_rtx (DImode);
9927 emit_insn (gen_bsp_value (target));
9928 #ifdef POINTERS_EXTEND_UNSIGNED
9929 target = convert_memory_address (ptr_mode, target);
9930 #endif
9931 return target;
9933 case IA64_BUILTIN_FLUSHRS:
9934 emit_insn (gen_flushrs ());
9935 return const0_rtx;
9937 case IA64_BUILTIN_INFQ:
9938 case IA64_BUILTIN_HUGE_VALQ:
9940 REAL_VALUE_TYPE inf;
9941 rtx tmp;
9943 real_inf (&inf);
9944 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
9946 tmp = validize_mem (force_const_mem (mode, tmp));
9948 if (target == 0)
9949 target = gen_reg_rtx (mode);
9951 emit_move_insn (target, tmp);
9952 return target;
9955 case IA64_BUILTIN_FABSQ:
9956 case IA64_BUILTIN_COPYSIGNQ:
9957 return expand_call (exp, target, ignore);
9959 default:
9960 gcc_unreachable ();
9963 return NULL_RTX;
9966 /* For the HP-UX IA64 aggregate parameters are passed stored in the
9967 most significant bits of the stack slot. */
9969 enum direction
9970 ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
9972 /* Exception to normal case for structures/unions/etc. */
9974 if (type && AGGREGATE_TYPE_P (type)
9975 && int_size_in_bytes (type) < UNITS_PER_WORD)
9976 return upward;
9978 /* Fall back to the default. */
9979 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9982 /* Emit text to declare externally defined variables and functions, because
9983 the Intel assembler does not support undefined externals. */
9985 void
9986 ia64_asm_output_external (FILE *file, tree decl, const char *name)
9988 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
9989 set in order to avoid putting out names that are never really
9990 used. */
9991 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
9993 /* maybe_assemble_visibility will return 1 if the assembler
9994 visibility directive is output. */
9995 int need_visibility = ((*targetm.binds_local_p) (decl)
9996 && maybe_assemble_visibility (decl));
9998 #ifdef DO_CRTL_NAMES
9999 DO_CRTL_NAMES;
10000 #endif
10002 /* GNU as does not need anything here, but the HP linker does
10003 need something for external functions. */
10004 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10005 && TREE_CODE (decl) == FUNCTION_DECL)
10006 (*targetm.asm_out.globalize_decl_name) (file, decl);
10007 else if (need_visibility && !TARGET_GNU_AS)
10008 (*targetm.asm_out.globalize_label) (file, name);
10012 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10013 modes of word_mode and larger. Rename the TFmode libfuncs using the
10014 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10015 backward compatibility. */
10017 static void
10018 ia64_init_libfuncs (void)
10020 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10021 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10022 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10023 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10025 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10026 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10027 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10028 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10029 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10031 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10032 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10033 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10034 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10035 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10036 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10038 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10039 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10040 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10041 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10042 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10044 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10045 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10046 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10047 /* HP-UX 11.23 libc does not have a function for unsigned
10048 SImode-to-TFmode conversion. */
10049 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10052 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10054 static void
10055 ia64_hpux_init_libfuncs (void)
10057 ia64_init_libfuncs ();
10059 /* The HP SI millicode division and mod functions expect DI arguments.
10060 By turning them off completely we avoid using both libgcc and the
10061 non-standard millicode routines and use the HP DI millicode routines
10062 instead. */
10064 set_optab_libfunc (sdiv_optab, SImode, 0);
10065 set_optab_libfunc (udiv_optab, SImode, 0);
10066 set_optab_libfunc (smod_optab, SImode, 0);
10067 set_optab_libfunc (umod_optab, SImode, 0);
10069 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10070 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10071 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10072 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10074 /* HP-UX libc has TF min/max/abs routines in it. */
10075 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10076 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10077 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10079 /* ia64_expand_compare uses this. */
10080 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10082 /* These should never be used. */
10083 set_optab_libfunc (eq_optab, TFmode, 0);
10084 set_optab_libfunc (ne_optab, TFmode, 0);
10085 set_optab_libfunc (gt_optab, TFmode, 0);
10086 set_optab_libfunc (ge_optab, TFmode, 0);
10087 set_optab_libfunc (lt_optab, TFmode, 0);
10088 set_optab_libfunc (le_optab, TFmode, 0);
10091 /* Rename the division and modulus functions in VMS. */
10093 static void
10094 ia64_vms_init_libfuncs (void)
10096 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10097 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10098 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10099 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10100 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10101 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10102 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10103 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10104 abort_libfunc = init_one_libfunc ("decc$abort");
10105 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10106 #ifdef MEM_LIBFUNCS_INIT
10107 MEM_LIBFUNCS_INIT;
10108 #endif
10111 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10112 the HPUX conventions. */
10114 static void
10115 ia64_sysv4_init_libfuncs (void)
10117 ia64_init_libfuncs ();
10119 /* These functions are not part of the HPUX TFmode interface. We
10120 use them instead of _U_Qfcmp, which doesn't work the way we
10121 expect. */
10122 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10123 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10124 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10125 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10126 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10127 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10129 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10130 glibc doesn't have them. */
10133 /* Use soft-fp. */
10135 static void
10136 ia64_soft_fp_init_libfuncs (void)
10140 static bool
10141 ia64_vms_valid_pointer_mode (enum machine_mode mode)
10143 return (mode == SImode || mode == DImode);
10146 /* For HPUX, it is illegal to have relocations in shared segments. */
10148 static int
10149 ia64_hpux_reloc_rw_mask (void)
10151 return 3;
10154 /* For others, relax this so that relocations to local data goes in
10155 read-only segments, but we still cannot allow global relocations
10156 in read-only segments. */
10158 static int
10159 ia64_reloc_rw_mask (void)
10161 return flag_pic ? 3 : 2;
10164 /* Return the section to use for X. The only special thing we do here
10165 is to honor small data. */
10167 static section *
10168 ia64_select_rtx_section (enum machine_mode mode, rtx x,
10169 unsigned HOST_WIDE_INT align)
10171 if (GET_MODE_SIZE (mode) > 0
10172 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10173 && !TARGET_NO_SDATA)
10174 return sdata_section;
10175 else
10176 return default_elf_select_rtx_section (mode, x, align);
10179 static unsigned int
10180 ia64_section_type_flags (tree decl, const char *name, int reloc)
10182 unsigned int flags = 0;
10184 if (strcmp (name, ".sdata") == 0
10185 || strncmp (name, ".sdata.", 7) == 0
10186 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10187 || strncmp (name, ".sdata2.", 8) == 0
10188 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10189 || strcmp (name, ".sbss") == 0
10190 || strncmp (name, ".sbss.", 6) == 0
10191 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10192 flags = SECTION_SMALL;
10194 #if TARGET_ABI_OPEN_VMS
10195 if (decl && DECL_ATTRIBUTES (decl)
10196 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10197 flags |= SECTION_VMS_OVERLAY;
10198 #endif
10200 flags |= default_section_type_flags (decl, name, reloc);
10201 return flags;
10204 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10205 structure type and that the address of that type should be passed
10206 in out0, rather than in r8. */
10208 static bool
10209 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10211 tree ret_type = TREE_TYPE (fntype);
10213 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10214 as the structure return address parameter, if the return value
10215 type has a non-trivial copy constructor or destructor. It is not
10216 clear if this same convention should be used for other
10217 programming languages. Until G++ 3.4, we incorrectly used r8 for
10218 these return values. */
10219 return (abi_version_at_least (2)
10220 && ret_type
10221 && TYPE_MODE (ret_type) == BLKmode
10222 && TREE_ADDRESSABLE (ret_type)
10223 && strcmp (lang_hooks.name, "GNU C++") == 0);
10226 /* Output the assembler code for a thunk function. THUNK_DECL is the
10227 declaration for the thunk function itself, FUNCTION is the decl for
10228 the target function. DELTA is an immediate constant offset to be
10229 added to THIS. If VCALL_OFFSET is nonzero, the word at
10230 *(*this + vcall_offset) should be added to THIS. */
10232 static void
10233 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10234 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10235 tree function)
10237 rtx this_rtx, insn, funexp;
10238 unsigned int this_parmno;
10239 unsigned int this_regno;
10240 rtx delta_rtx;
10242 reload_completed = 1;
10243 epilogue_completed = 1;
10245 /* Set things up as ia64_expand_prologue might. */
10246 last_scratch_gr_reg = 15;
10248 memset (&current_frame_info, 0, sizeof (current_frame_info));
10249 current_frame_info.spill_cfa_off = -16;
10250 current_frame_info.n_input_regs = 1;
10251 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10253 /* Mark the end of the (empty) prologue. */
10254 emit_note (NOTE_INSN_PROLOGUE_END);
10256 /* Figure out whether "this" will be the first parameter (the
10257 typical case) or the second parameter (as happens when the
10258 virtual function returns certain class objects). */
10259 this_parmno
10260 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10261 ? 1 : 0);
10262 this_regno = IN_REG (this_parmno);
10263 if (!TARGET_REG_NAMES)
10264 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10266 this_rtx = gen_rtx_REG (Pmode, this_regno);
10268 /* Apply the constant offset, if required. */
10269 delta_rtx = GEN_INT (delta);
10270 if (TARGET_ILP32)
10272 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10273 REG_POINTER (tmp) = 1;
10274 if (delta && satisfies_constraint_I (delta_rtx))
10276 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10277 delta = 0;
10279 else
10280 emit_insn (gen_ptr_extend (this_rtx, tmp));
10282 if (delta)
10284 if (!satisfies_constraint_I (delta_rtx))
10286 rtx tmp = gen_rtx_REG (Pmode, 2);
10287 emit_move_insn (tmp, delta_rtx);
10288 delta_rtx = tmp;
10290 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10293 /* Apply the offset from the vtable, if required. */
10294 if (vcall_offset)
10296 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10297 rtx tmp = gen_rtx_REG (Pmode, 2);
10299 if (TARGET_ILP32)
10301 rtx t = gen_rtx_REG (ptr_mode, 2);
10302 REG_POINTER (t) = 1;
10303 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10304 if (satisfies_constraint_I (vcall_offset_rtx))
10306 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10307 vcall_offset = 0;
10309 else
10310 emit_insn (gen_ptr_extend (tmp, t));
10312 else
10313 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10315 if (vcall_offset)
10317 if (!satisfies_constraint_J (vcall_offset_rtx))
10319 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10320 emit_move_insn (tmp2, vcall_offset_rtx);
10321 vcall_offset_rtx = tmp2;
10323 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10326 if (TARGET_ILP32)
10327 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10328 else
10329 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10331 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10334 /* Generate a tail call to the target function. */
10335 if (! TREE_USED (function))
10337 assemble_external (function);
10338 TREE_USED (function) = 1;
10340 funexp = XEXP (DECL_RTL (function), 0);
10341 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10342 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10343 insn = get_last_insn ();
10344 SIBLING_CALL_P (insn) = 1;
10346 /* Code generation for calls relies on splitting. */
10347 reload_completed = 1;
10348 epilogue_completed = 1;
10349 try_split (PATTERN (insn), insn, 0);
10351 emit_barrier ();
10353 /* Run just enough of rest_of_compilation to get the insns emitted.
10354 There's not really enough bulk here to make other passes such as
10355 instruction scheduling worth while. Note that use_thunk calls
10356 assemble_start_function and assemble_end_function. */
10358 insn_locators_alloc ();
10359 emit_all_insn_group_barriers (NULL);
10360 insn = get_insns ();
10361 shorten_branches (insn);
10362 final_start_function (insn, file, 1);
10363 final (insn, file, 1);
10364 final_end_function ();
10366 reload_completed = 0;
10367 epilogue_completed = 0;
10370 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10372 static rtx
10373 ia64_struct_value_rtx (tree fntype,
10374 int incoming ATTRIBUTE_UNUSED)
10376 if (TARGET_ABI_OPEN_VMS ||
10377 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10378 return NULL_RTX;
10379 return gen_rtx_REG (Pmode, GR_REG (8));
10382 static bool
10383 ia64_scalar_mode_supported_p (enum machine_mode mode)
10385 switch (mode)
10387 case QImode:
10388 case HImode:
10389 case SImode:
10390 case DImode:
10391 case TImode:
10392 return true;
10394 case SFmode:
10395 case DFmode:
10396 case XFmode:
10397 case RFmode:
10398 return true;
10400 case TFmode:
10401 return true;
10403 default:
10404 return false;
10408 static bool
10409 ia64_vector_mode_supported_p (enum machine_mode mode)
10411 switch (mode)
10413 case V8QImode:
10414 case V4HImode:
10415 case V2SImode:
10416 return true;
10418 case V2SFmode:
10419 return true;
10421 default:
10422 return false;
10426 /* Implement the FUNCTION_PROFILER macro. */
10428 void
10429 ia64_output_function_profiler (FILE *file, int labelno)
10431 bool indirect_call;
10433 /* If the function needs a static chain and the static chain
10434 register is r15, we use an indirect call so as to bypass
10435 the PLT stub in case the executable is dynamically linked,
10436 because the stub clobbers r15 as per 5.3.6 of the psABI.
10437 We don't need to do that in non canonical PIC mode. */
10439 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10441 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10442 indirect_call = true;
10444 else
10445 indirect_call = false;
10447 if (TARGET_GNU_AS)
10448 fputs ("\t.prologue 4, r40\n", file);
10449 else
10450 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10451 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10453 if (NO_PROFILE_COUNTERS)
10454 fputs ("\tmov out3 = r0\n", file);
10455 else
10457 char buf[20];
10458 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10460 if (TARGET_AUTO_PIC)
10461 fputs ("\tmovl out3 = @gprel(", file);
10462 else
10463 fputs ("\taddl out3 = @ltoff(", file);
10464 assemble_name (file, buf);
10465 if (TARGET_AUTO_PIC)
10466 fputs (")\n", file);
10467 else
10468 fputs ("), r1\n", file);
10471 if (indirect_call)
10472 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10473 fputs ("\t;;\n", file);
10475 fputs ("\t.save rp, r42\n", file);
10476 fputs ("\tmov out2 = b0\n", file);
10477 if (indirect_call)
10478 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10479 fputs ("\t.body\n", file);
10480 fputs ("\tmov out1 = r1\n", file);
10481 if (indirect_call)
10483 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10484 fputs ("\tmov b6 = r16\n", file);
10485 fputs ("\tld8 r1 = [r14]\n", file);
10486 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10488 else
10489 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10492 static GTY(()) rtx mcount_func_rtx;
10493 static rtx
10494 gen_mcount_func_rtx (void)
10496 if (!mcount_func_rtx)
10497 mcount_func_rtx = init_one_libfunc ("_mcount");
10498 return mcount_func_rtx;
10501 void
10502 ia64_profile_hook (int labelno)
10504 rtx label, ip;
10506 if (NO_PROFILE_COUNTERS)
10507 label = const0_rtx;
10508 else
10510 char buf[30];
10511 const char *label_name;
10512 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10513 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10514 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10515 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10517 ip = gen_reg_rtx (Pmode);
10518 emit_insn (gen_ip_value (ip));
10519 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10520 VOIDmode, 3,
10521 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10522 ip, Pmode,
10523 label, Pmode);
10526 /* Return the mangling of TYPE if it is an extended fundamental type. */
10528 static const char *
10529 ia64_mangle_type (const_tree type)
10531 type = TYPE_MAIN_VARIANT (type);
10533 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10534 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10535 return NULL;
10537 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10538 mangled as "e". */
10539 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10540 return "g";
10541 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10542 an extended mangling. Elsewhere, "e" is available since long
10543 double is 80 bits. */
10544 if (TYPE_MODE (type) == XFmode)
10545 return TARGET_HPUX ? "u9__float80" : "e";
10546 if (TYPE_MODE (type) == RFmode)
10547 return "u7__fpreg";
10548 return NULL;
10551 /* Return the diagnostic message string if conversion from FROMTYPE to
10552 TOTYPE is not allowed, NULL otherwise. */
10553 static const char *
10554 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10556 /* Reject nontrivial conversion to or from __fpreg. */
10557 if (TYPE_MODE (fromtype) == RFmode
10558 && TYPE_MODE (totype) != RFmode
10559 && TYPE_MODE (totype) != VOIDmode)
10560 return N_("invalid conversion from %<__fpreg%>");
10561 if (TYPE_MODE (totype) == RFmode
10562 && TYPE_MODE (fromtype) != RFmode)
10563 return N_("invalid conversion to %<__fpreg%>");
10564 return NULL;
10567 /* Return the diagnostic message string if the unary operation OP is
10568 not permitted on TYPE, NULL otherwise. */
10569 static const char *
10570 ia64_invalid_unary_op (int op, const_tree type)
10572 /* Reject operations on __fpreg other than unary + or &. */
10573 if (TYPE_MODE (type) == RFmode
10574 && op != CONVERT_EXPR
10575 && op != ADDR_EXPR)
10576 return N_("invalid operation on %<__fpreg%>");
10577 return NULL;
10580 /* Return the diagnostic message string if the binary operation OP is
10581 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10582 static const char *
10583 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10585 /* Reject operations on __fpreg. */
10586 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10587 return N_("invalid operation on %<__fpreg%>");
10588 return NULL;
10591 /* Implement overriding of the optimization options. */
10592 void
10593 ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10594 int size ATTRIBUTE_UNUSED)
10596 /* Let the scheduler form additional regions. */
10597 set_param_value ("max-sched-extend-regions-iters", 2);
10599 /* Set the default values for cache-related parameters. */
10600 set_param_value ("simultaneous-prefetches", 6);
10601 set_param_value ("l1-cache-line-size", 32);
10603 set_param_value("sched-mem-true-dep-cost", 4);
10606 /* HP-UX version_id attribute.
10607 For object foo, if the version_id is set to 1234 put out an alias
10608 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10609 other than an alias statement because it is an illegal symbol name. */
10611 static tree
10612 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10613 tree name ATTRIBUTE_UNUSED,
10614 tree args,
10615 int flags ATTRIBUTE_UNUSED,
10616 bool *no_add_attrs)
10618 tree arg = TREE_VALUE (args);
10620 if (TREE_CODE (arg) != STRING_CST)
10622 error("version attribute is not a string");
10623 *no_add_attrs = true;
10624 return NULL_TREE;
10626 return NULL_TREE;
10629 /* Target hook for c_mode_for_suffix. */
10631 static enum machine_mode
10632 ia64_c_mode_for_suffix (char suffix)
10634 if (suffix == 'q')
10635 return TFmode;
10636 if (suffix == 'w')
10637 return XFmode;
10639 return VOIDmode;
10642 static enum machine_mode
10643 ia64_promote_function_mode (const_tree type,
10644 enum machine_mode mode,
10645 int *punsignedp,
10646 const_tree funtype,
10647 int for_return)
10649 /* Special processing required for OpenVMS ... */
10651 if (!TARGET_ABI_OPEN_VMS)
10652 return default_promote_function_mode(type, mode, punsignedp, funtype,
10653 for_return);
10655 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10656 HP OpenVMS I64 Version 8.2EFT,
10657 chapter 4 "OpenVMS I64 Conventions"
10658 section 4.7 "Procedure Linkage"
10659 subsection 4.7.5.2, "Normal Register Parameters"
10661 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10662 values passed in registers are zero-filled; signed integral values as
10663 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10664 For all other types passed in the general registers, unused bits are
10665 undefined." */
10667 if (!AGGREGATE_TYPE_P (type)
10668 && GET_MODE_CLASS (mode) == MODE_INT
10669 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10671 if (mode == SImode)
10672 *punsignedp = 0;
10673 return DImode;
10675 else
10676 return promote_mode (type, mode, punsignedp);
10679 static GTY(()) rtx ia64_dconst_0_5_rtx;
10682 ia64_dconst_0_5 (void)
10684 if (! ia64_dconst_0_5_rtx)
10686 REAL_VALUE_TYPE rv;
10687 real_from_string (&rv, "0.5");
10688 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10690 return ia64_dconst_0_5_rtx;
10693 static GTY(()) rtx ia64_dconst_0_375_rtx;
10696 ia64_dconst_0_375 (void)
10698 if (! ia64_dconst_0_375_rtx)
10700 REAL_VALUE_TYPE rv;
10701 real_from_string (&rv, "0.375");
10702 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10704 return ia64_dconst_0_375_rtx;
10708 #include "gt-ia64.h"