2017-09-18 Jeff Law <law@redhat.com>
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob00ef2159323e05b0b57f5bb34ec8164b3120fe07
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "flags.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "cfgrtl.h"
51 #include "libfuncs.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "gimplify.h"
56 #include "intl.h"
57 #include "debug.h"
58 #include "params.h"
59 #include "dbgcnt.h"
60 #include "tm-constrs.h"
61 #include "sel-sched.h"
62 #include "reload.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "builtins.h"
67 /* This file should be included last. */
68 #include "target-def.h"
70 /* This is used for communication between ASM_OUTPUT_LABEL and
71 ASM_OUTPUT_LABELREF. */
72 int ia64_asm_output_label = 0;
74 /* Register names for ia64_expand_prologue. */
75 static const char * const ia64_reg_numbers[96] =
76 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
77 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
78 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
79 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
80 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
81 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
82 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
83 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
84 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
85 "r104","r105","r106","r107","r108","r109","r110","r111",
86 "r112","r113","r114","r115","r116","r117","r118","r119",
87 "r120","r121","r122","r123","r124","r125","r126","r127"};
89 /* ??? These strings could be shared with REGISTER_NAMES. */
90 static const char * const ia64_input_reg_names[8] =
91 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
93 /* ??? These strings could be shared with REGISTER_NAMES. */
94 static const char * const ia64_local_reg_names[80] =
95 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
96 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
97 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
98 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
99 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
100 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
101 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
102 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
103 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
104 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
106 /* ??? These strings could be shared with REGISTER_NAMES. */
107 static const char * const ia64_output_reg_names[8] =
108 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
110 /* Variables which are this size or smaller are put in the sdata/sbss
111 sections. */
113 unsigned int ia64_section_threshold;
115 /* The following variable is used by the DFA insn scheduler. The value is
116 TRUE if we do insn bundling instead of insn scheduling. */
117 int bundling_p = 0;
119 enum ia64_frame_regs
121 reg_fp,
122 reg_save_b0,
123 reg_save_pr,
124 reg_save_ar_pfs,
125 reg_save_ar_unat,
126 reg_save_ar_lc,
127 reg_save_gp,
128 number_of_ia64_frame_regs
131 /* Structure to be filled in by ia64_compute_frame_size with register
132 save masks and offsets for the current function. */
134 struct ia64_frame_info
136 HOST_WIDE_INT total_size; /* size of the stack frame, not including
137 the caller's scratch area. */
138 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
139 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
140 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
141 HARD_REG_SET mask; /* mask of saved registers. */
142 unsigned int gr_used_mask; /* mask of registers in use as gr spill
143 registers or long-term scratches. */
144 int n_spilled; /* number of spilled registers. */
145 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
146 int n_input_regs; /* number of input registers used. */
147 int n_local_regs; /* number of local registers used. */
148 int n_output_regs; /* number of output registers used. */
149 int n_rotate_regs; /* number of rotating registers used. */
151 char need_regstk; /* true if a .regstk directive needed. */
152 char initialized; /* true if the data is finalized. */
155 /* Current frame information calculated by ia64_compute_frame_size. */
156 static struct ia64_frame_info current_frame_info;
157 /* The actual registers that are emitted. */
158 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
160 static int ia64_first_cycle_multipass_dfa_lookahead (void);
161 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
162 static void ia64_init_dfa_pre_cycle_insn (void);
163 static rtx ia64_dfa_pre_cycle_insn (void);
164 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
165 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
166 static void ia64_h_i_d_extended (void);
167 static void * ia64_alloc_sched_context (void);
168 static void ia64_init_sched_context (void *, bool);
169 static void ia64_set_sched_context (void *);
170 static void ia64_clear_sched_context (void *);
171 static void ia64_free_sched_context (void *);
172 static int ia64_mode_to_int (machine_mode);
173 static void ia64_set_sched_flags (spec_info_t);
174 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
175 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
176 static bool ia64_skip_rtx_p (const_rtx);
177 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
178 static bool ia64_needs_block_p (ds_t);
179 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
180 static int ia64_spec_check_p (rtx);
181 static int ia64_spec_check_src_p (rtx);
182 static rtx gen_tls_get_addr (void);
183 static rtx gen_thread_pointer (void);
184 static int find_gr_spill (enum ia64_frame_regs, int);
185 static int next_scratch_gr_reg (void);
186 static void mark_reg_gr_used_mask (rtx, void *);
187 static void ia64_compute_frame_size (HOST_WIDE_INT);
188 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
189 static void finish_spill_pointers (void);
190 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
191 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
192 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
193 static rtx gen_movdi_x (rtx, rtx, rtx);
194 static rtx gen_fr_spill_x (rtx, rtx, rtx);
195 static rtx gen_fr_restore_x (rtx, rtx, rtx);
197 static void ia64_option_override (void);
198 static bool ia64_can_eliminate (const int, const int);
199 static machine_mode hfa_element_mode (const_tree, bool);
200 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
201 tree, int *, int);
202 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
203 tree, bool);
204 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
205 const_tree, bool, bool);
206 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
207 const_tree, bool);
208 static rtx ia64_function_incoming_arg (cumulative_args_t,
209 machine_mode, const_tree, bool);
210 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
211 const_tree, bool);
212 static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
213 static unsigned int ia64_function_arg_boundary (machine_mode,
214 const_tree);
215 static bool ia64_function_ok_for_sibcall (tree, tree);
216 static bool ia64_return_in_memory (const_tree, const_tree);
217 static rtx ia64_function_value (const_tree, const_tree, bool);
218 static rtx ia64_libcall_value (machine_mode, const_rtx);
219 static bool ia64_function_value_regno_p (const unsigned int);
220 static int ia64_register_move_cost (machine_mode, reg_class_t,
221 reg_class_t);
222 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
223 bool);
224 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
225 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
226 static void fix_range (const char *);
227 static struct machine_function * ia64_init_machine_status (void);
228 static void emit_insn_group_barriers (FILE *);
229 static void emit_all_insn_group_barriers (FILE *);
230 static void final_emit_insn_group_barriers (FILE *);
231 static void emit_predicate_relation_info (void);
232 static void ia64_reorg (void);
233 static bool ia64_in_small_data_p (const_tree);
234 static void process_epilogue (FILE *, rtx, bool, bool);
236 static bool ia64_assemble_integer (rtx, unsigned int, int);
237 static void ia64_output_function_prologue (FILE *);
238 static void ia64_output_function_epilogue (FILE *);
239 static void ia64_output_function_end_prologue (FILE *);
241 static void ia64_print_operand (FILE *, rtx, int);
242 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
243 static bool ia64_print_operand_punct_valid_p (unsigned char code);
245 static int ia64_issue_rate (void);
246 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
247 static void ia64_sched_init (FILE *, int, int);
248 static void ia64_sched_init_global (FILE *, int, int);
249 static void ia64_sched_finish_global (FILE *, int);
250 static void ia64_sched_finish (FILE *, int);
251 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
252 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
253 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
254 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
256 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
257 static void ia64_asm_emit_except_personality (rtx);
258 static void ia64_asm_init_sections (void);
260 static enum unwind_info_type ia64_debug_unwind_info (void);
262 static struct bundle_state *get_free_bundle_state (void);
263 static void free_bundle_state (struct bundle_state *);
264 static void initiate_bundle_states (void);
265 static void finish_bundle_states (void);
266 static int insert_bundle_state (struct bundle_state *);
267 static void initiate_bundle_state_table (void);
268 static void finish_bundle_state_table (void);
269 static int try_issue_nops (struct bundle_state *, int);
270 static int try_issue_insn (struct bundle_state *, rtx);
271 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
272 int, int);
273 static int get_max_pos (state_t);
274 static int get_template (state_t, int);
276 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
277 static bool important_for_bundling_p (rtx_insn *);
278 static bool unknown_for_bundling_p (rtx_insn *);
279 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
281 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
282 HOST_WIDE_INT, tree);
283 static void ia64_file_start (void);
284 static void ia64_globalize_decl_name (FILE *, tree);
286 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
287 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
288 static section *ia64_select_rtx_section (machine_mode, rtx,
289 unsigned HOST_WIDE_INT);
290 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
291 ATTRIBUTE_UNUSED;
292 static unsigned int ia64_section_type_flags (tree, const char *, int);
293 static void ia64_init_libfuncs (void)
294 ATTRIBUTE_UNUSED;
295 static void ia64_hpux_init_libfuncs (void)
296 ATTRIBUTE_UNUSED;
297 static void ia64_sysv4_init_libfuncs (void)
298 ATTRIBUTE_UNUSED;
299 static void ia64_vms_init_libfuncs (void)
300 ATTRIBUTE_UNUSED;
301 static void ia64_soft_fp_init_libfuncs (void)
302 ATTRIBUTE_UNUSED;
303 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
304 ATTRIBUTE_UNUSED;
305 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
306 ATTRIBUTE_UNUSED;
308 static bool ia64_attribute_takes_identifier_p (const_tree);
309 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
310 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
311 static void ia64_encode_section_info (tree, rtx, int);
312 static rtx ia64_struct_value_rtx (tree, int);
313 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
314 static bool ia64_scalar_mode_supported_p (scalar_mode mode);
315 static bool ia64_vector_mode_supported_p (machine_mode mode);
316 static bool ia64_legitimate_constant_p (machine_mode, rtx);
317 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
318 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
319 static const char *ia64_mangle_type (const_tree);
320 static const char *ia64_invalid_conversion (const_tree, const_tree);
321 static const char *ia64_invalid_unary_op (int, const_tree);
322 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
323 static machine_mode ia64_c_mode_for_suffix (char);
324 static void ia64_trampoline_init (rtx, tree, rtx);
325 static void ia64_override_options_after_change (void);
326 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
328 static tree ia64_fold_builtin (tree, int, tree *, bool);
329 static tree ia64_builtin_decl (unsigned, bool);
331 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
332 static machine_mode ia64_get_reg_raw_mode (int regno);
333 static section * ia64_hpux_function_section (tree, enum node_frequency,
334 bool, bool);
336 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
337 const unsigned char *sel);
339 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
340 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
341 static bool ia64_modes_tieable_p (machine_mode, machine_mode);
342 static bool ia64_can_change_mode_class (machine_mode, machine_mode,
343 reg_class_t);
345 #define MAX_VECT_LEN 8
347 struct expand_vec_perm_d
349 rtx target, op0, op1;
350 unsigned char perm[MAX_VECT_LEN];
351 machine_mode vmode;
352 unsigned char nelt;
353 bool one_operand_p;
354 bool testing_p;
357 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
360 /* Table of valid machine attributes. */
361 static const struct attribute_spec ia64_attribute_table[] =
363 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
364 affects_type_identity } */
365 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
366 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
367 false },
368 #if TARGET_ABI_OPEN_VMS
369 { "common_object", 1, 1, true, false, false,
370 ia64_vms_common_object_attribute, false },
371 #endif
372 { "version_id", 1, 1, true, false, false,
373 ia64_handle_version_id_attribute, false },
374 { NULL, 0, 0, false, false, false, NULL, false }
377 /* Initialize the GCC target structure. */
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
381 #undef TARGET_INIT_BUILTINS
382 #define TARGET_INIT_BUILTINS ia64_init_builtins
384 #undef TARGET_FOLD_BUILTIN
385 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
387 #undef TARGET_EXPAND_BUILTIN
388 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
390 #undef TARGET_BUILTIN_DECL
391 #define TARGET_BUILTIN_DECL ia64_builtin_decl
393 #undef TARGET_ASM_BYTE_OP
394 #define TARGET_ASM_BYTE_OP "\tdata1\t"
395 #undef TARGET_ASM_ALIGNED_HI_OP
396 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
397 #undef TARGET_ASM_ALIGNED_SI_OP
398 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
399 #undef TARGET_ASM_ALIGNED_DI_OP
400 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
401 #undef TARGET_ASM_UNALIGNED_HI_OP
402 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
403 #undef TARGET_ASM_UNALIGNED_SI_OP
404 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
405 #undef TARGET_ASM_UNALIGNED_DI_OP
406 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
407 #undef TARGET_ASM_INTEGER
408 #define TARGET_ASM_INTEGER ia64_assemble_integer
410 #undef TARGET_OPTION_OVERRIDE
411 #define TARGET_OPTION_OVERRIDE ia64_option_override
413 #undef TARGET_ASM_FUNCTION_PROLOGUE
414 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
415 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
416 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
417 #undef TARGET_ASM_FUNCTION_EPILOGUE
418 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
420 #undef TARGET_PRINT_OPERAND
421 #define TARGET_PRINT_OPERAND ia64_print_operand
422 #undef TARGET_PRINT_OPERAND_ADDRESS
423 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
424 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
425 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
427 #undef TARGET_IN_SMALL_DATA_P
428 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
432 #undef TARGET_SCHED_ISSUE_RATE
433 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
434 #undef TARGET_SCHED_VARIABLE_ISSUE
435 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
436 #undef TARGET_SCHED_INIT
437 #define TARGET_SCHED_INIT ia64_sched_init
438 #undef TARGET_SCHED_FINISH
439 #define TARGET_SCHED_FINISH ia64_sched_finish
440 #undef TARGET_SCHED_INIT_GLOBAL
441 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
442 #undef TARGET_SCHED_FINISH_GLOBAL
443 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
444 #undef TARGET_SCHED_REORDER
445 #define TARGET_SCHED_REORDER ia64_sched_reorder
446 #undef TARGET_SCHED_REORDER2
447 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
449 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
450 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
452 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
453 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
455 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
456 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
457 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
458 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
460 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
461 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
462 ia64_first_cycle_multipass_dfa_lookahead_guard
464 #undef TARGET_SCHED_DFA_NEW_CYCLE
465 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
467 #undef TARGET_SCHED_H_I_D_EXTENDED
468 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
470 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
471 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
473 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
474 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
476 #undef TARGET_SCHED_SET_SCHED_CONTEXT
477 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
479 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
480 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
482 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
483 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
485 #undef TARGET_SCHED_SET_SCHED_FLAGS
486 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
488 #undef TARGET_SCHED_GET_INSN_SPEC_DS
489 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
491 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
492 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
494 #undef TARGET_SCHED_SPECULATE_INSN
495 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
497 #undef TARGET_SCHED_NEEDS_BLOCK_P
498 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
500 #undef TARGET_SCHED_GEN_SPEC_CHECK
501 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
503 #undef TARGET_SCHED_SKIP_RTX_P
504 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
506 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
507 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG ia64_function_arg
512 #undef TARGET_FUNCTION_INCOMING_ARG
513 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
514 #undef TARGET_FUNCTION_ARG_ADVANCE
515 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
516 #undef TARGET_FUNCTION_ARG_PADDING
517 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
518 #undef TARGET_FUNCTION_ARG_BOUNDARY
519 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
521 #undef TARGET_ASM_OUTPUT_MI_THUNK
522 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
523 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
524 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
526 #undef TARGET_ASM_FILE_START
527 #define TARGET_ASM_FILE_START ia64_file_start
529 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
530 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
532 #undef TARGET_REGISTER_MOVE_COST
533 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
534 #undef TARGET_MEMORY_MOVE_COST
535 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
536 #undef TARGET_RTX_COSTS
537 #define TARGET_RTX_COSTS ia64_rtx_costs
538 #undef TARGET_ADDRESS_COST
539 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
541 #undef TARGET_UNSPEC_MAY_TRAP_P
542 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
544 #undef TARGET_MACHINE_DEPENDENT_REORG
545 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
547 #undef TARGET_ENCODE_SECTION_INFO
548 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
550 #undef TARGET_SECTION_TYPE_FLAGS
551 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
553 #ifdef HAVE_AS_TLS
554 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
555 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
556 #endif
558 /* ??? Investigate. */
559 #if 0
560 #undef TARGET_PROMOTE_PROTOTYPES
561 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
562 #endif
564 #undef TARGET_FUNCTION_VALUE
565 #define TARGET_FUNCTION_VALUE ia64_function_value
566 #undef TARGET_LIBCALL_VALUE
567 #define TARGET_LIBCALL_VALUE ia64_libcall_value
568 #undef TARGET_FUNCTION_VALUE_REGNO_P
569 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
571 #undef TARGET_STRUCT_VALUE_RTX
572 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
575 #undef TARGET_SETUP_INCOMING_VARARGS
576 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
577 #undef TARGET_STRICT_ARGUMENT_NAMING
578 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
579 #undef TARGET_MUST_PASS_IN_STACK
580 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
581 #undef TARGET_GET_RAW_RESULT_MODE
582 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
583 #undef TARGET_GET_RAW_ARG_MODE
584 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
586 #undef TARGET_MEMBER_TYPE_FORCES_BLK
587 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
589 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
590 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
592 #undef TARGET_ASM_UNWIND_EMIT
593 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
594 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
595 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
596 #undef TARGET_ASM_INIT_SECTIONS
597 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
599 #undef TARGET_DEBUG_UNWIND_INFO
600 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
602 #undef TARGET_SCALAR_MODE_SUPPORTED_P
603 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
604 #undef TARGET_VECTOR_MODE_SUPPORTED_P
605 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
609 #undef TARGET_LEGITIMATE_ADDRESS_P
610 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
612 #undef TARGET_LRA_P
613 #define TARGET_LRA_P hook_bool_void_false
615 #undef TARGET_CANNOT_FORCE_CONST_MEM
616 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
618 #undef TARGET_MANGLE_TYPE
619 #define TARGET_MANGLE_TYPE ia64_mangle_type
621 #undef TARGET_INVALID_CONVERSION
622 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
623 #undef TARGET_INVALID_UNARY_OP
624 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
625 #undef TARGET_INVALID_BINARY_OP
626 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
628 #undef TARGET_C_MODE_FOR_SUFFIX
629 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
631 #undef TARGET_CAN_ELIMINATE
632 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
634 #undef TARGET_TRAMPOLINE_INIT
635 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
637 #undef TARGET_CAN_USE_DOLOOP_P
638 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
639 #undef TARGET_INVALID_WITHIN_DOLOOP
640 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
642 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
643 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
645 #undef TARGET_PREFERRED_RELOAD_CLASS
646 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
648 #undef TARGET_DELAY_SCHED2
649 #define TARGET_DELAY_SCHED2 true
651 /* Variable tracking should be run after all optimizations which
652 change order of insns. It also needs a valid CFG. */
653 #undef TARGET_DELAY_VARTRACK
654 #define TARGET_DELAY_VARTRACK true
656 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
657 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
659 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
660 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
662 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
663 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
665 #undef TARGET_HARD_REGNO_NREGS
666 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
667 #undef TARGET_HARD_REGNO_MODE_OK
668 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
670 #undef TARGET_MODES_TIEABLE_P
671 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
673 #undef TARGET_CAN_CHANGE_MODE_CLASS
674 #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
676 struct gcc_target targetm = TARGET_INITIALIZER;
678 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
679 identifier as an argument, so the front end shouldn't look it up. */
681 static bool
682 ia64_attribute_takes_identifier_p (const_tree attr_id)
684 if (is_attribute_p ("model", attr_id))
685 return true;
686 #if TARGET_ABI_OPEN_VMS
687 if (is_attribute_p ("common_object", attr_id))
688 return true;
689 #endif
690 return false;
693 typedef enum
695 ADDR_AREA_NORMAL, /* normal address area */
696 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
698 ia64_addr_area;
700 static GTY(()) tree small_ident1;
701 static GTY(()) tree small_ident2;
703 static void
704 init_idents (void)
706 if (small_ident1 == 0)
708 small_ident1 = get_identifier ("small");
709 small_ident2 = get_identifier ("__small__");
713 /* Retrieve the address area that has been chosen for the given decl. */
715 static ia64_addr_area
716 ia64_get_addr_area (tree decl)
718 tree model_attr;
720 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
721 if (model_attr)
723 tree id;
725 init_idents ();
726 id = TREE_VALUE (TREE_VALUE (model_attr));
727 if (id == small_ident1 || id == small_ident2)
728 return ADDR_AREA_SMALL;
730 return ADDR_AREA_NORMAL;
733 static tree
734 ia64_handle_model_attribute (tree *node, tree name, tree args,
735 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
737 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
738 ia64_addr_area area;
739 tree arg, decl = *node;
741 init_idents ();
742 arg = TREE_VALUE (args);
743 if (arg == small_ident1 || arg == small_ident2)
745 addr_area = ADDR_AREA_SMALL;
747 else
749 warning (OPT_Wattributes, "invalid argument of %qE attribute",
750 name);
751 *no_add_attrs = true;
754 switch (TREE_CODE (decl))
756 case VAR_DECL:
757 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
758 == FUNCTION_DECL)
759 && !TREE_STATIC (decl))
761 error_at (DECL_SOURCE_LOCATION (decl),
762 "an address area attribute cannot be specified for "
763 "local variables");
764 *no_add_attrs = true;
766 area = ia64_get_addr_area (decl);
767 if (area != ADDR_AREA_NORMAL && addr_area != area)
769 error ("address area of %q+D conflicts with previous "
770 "declaration", decl);
771 *no_add_attrs = true;
773 break;
775 case FUNCTION_DECL:
776 error_at (DECL_SOURCE_LOCATION (decl),
777 "address area attribute cannot be specified for "
778 "functions");
779 *no_add_attrs = true;
780 break;
782 default:
783 warning (OPT_Wattributes, "%qE attribute ignored",
784 name);
785 *no_add_attrs = true;
786 break;
789 return NULL_TREE;
792 /* Part of the low level implementation of DEC Ada pragma Common_Object which
793 enables the shared use of variables stored in overlaid linker areas
794 corresponding to the use of Fortran COMMON. */
796 static tree
797 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
798 int flags ATTRIBUTE_UNUSED,
799 bool *no_add_attrs)
801 tree decl = *node;
802 tree id;
804 gcc_assert (DECL_P (decl));
806 DECL_COMMON (decl) = 1;
807 id = TREE_VALUE (args);
808 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
810 error ("%qE attribute requires a string constant argument", name);
811 *no_add_attrs = true;
812 return NULL_TREE;
814 return NULL_TREE;
817 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
819 void
820 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
821 unsigned HOST_WIDE_INT size,
822 unsigned int align)
824 tree attr = DECL_ATTRIBUTES (decl);
826 if (attr)
827 attr = lookup_attribute ("common_object", attr);
828 if (attr)
830 tree id = TREE_VALUE (TREE_VALUE (attr));
831 const char *name;
833 if (TREE_CODE (id) == IDENTIFIER_NODE)
834 name = IDENTIFIER_POINTER (id);
835 else if (TREE_CODE (id) == STRING_CST)
836 name = TREE_STRING_POINTER (id);
837 else
838 abort ();
840 fprintf (file, "\t.vms_common\t\"%s\",", name);
842 else
843 fprintf (file, "%s", COMMON_ASM_OP);
845 /* Code from elfos.h. */
846 assemble_name (file, name);
847 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
848 size, align / BITS_PER_UNIT);
850 fputc ('\n', file);
853 static void
854 ia64_encode_addr_area (tree decl, rtx symbol)
856 int flags;
858 flags = SYMBOL_REF_FLAGS (symbol);
859 switch (ia64_get_addr_area (decl))
861 case ADDR_AREA_NORMAL: break;
862 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
863 default: gcc_unreachable ();
865 SYMBOL_REF_FLAGS (symbol) = flags;
868 static void
869 ia64_encode_section_info (tree decl, rtx rtl, int first)
871 default_encode_section_info (decl, rtl, first);
873 /* Careful not to prod global register variables. */
874 if (TREE_CODE (decl) == VAR_DECL
875 && GET_CODE (DECL_RTL (decl)) == MEM
876 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
877 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
878 ia64_encode_addr_area (decl, XEXP (rtl, 0));
881 /* Return 1 if the operands of a move are ok. */
884 ia64_move_ok (rtx dst, rtx src)
886 /* If we're under init_recog_no_volatile, we'll not be able to use
887 memory_operand. So check the code directly and don't worry about
888 the validity of the underlying address, which should have been
889 checked elsewhere anyway. */
890 if (GET_CODE (dst) != MEM)
891 return 1;
892 if (GET_CODE (src) == MEM)
893 return 0;
894 if (register_operand (src, VOIDmode))
895 return 1;
897 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
898 if (INTEGRAL_MODE_P (GET_MODE (dst)))
899 return src == const0_rtx;
900 else
901 return satisfies_constraint_G (src);
904 /* Return 1 if the operands are ok for a floating point load pair. */
907 ia64_load_pair_ok (rtx dst, rtx src)
909 /* ??? There is a thinko in the implementation of the "x" constraint and the
910 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
911 also return false for it. */
912 if (GET_CODE (dst) != REG
913 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
914 return 0;
915 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
916 return 0;
917 switch (GET_CODE (XEXP (src, 0)))
919 case REG:
920 case POST_INC:
921 break;
922 case POST_DEC:
923 return 0;
924 case POST_MODIFY:
926 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
928 if (GET_CODE (adjust) != CONST_INT
929 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
930 return 0;
932 break;
933 default:
934 abort ();
936 return 1;
940 addp4_optimize_ok (rtx op1, rtx op2)
942 return (basereg_operand (op1, GET_MODE(op1)) !=
943 basereg_operand (op2, GET_MODE(op2)));
946 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
947 Return the length of the field, or <= 0 on failure. */
950 ia64_depz_field_mask (rtx rop, rtx rshift)
952 unsigned HOST_WIDE_INT op = INTVAL (rop);
953 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
955 /* Get rid of the zero bits we're shifting in. */
956 op >>= shift;
958 /* We must now have a solid block of 1's at bit 0. */
959 return exact_log2 (op + 1);
962 /* Return the TLS model to use for ADDR. */
964 static enum tls_model
965 tls_symbolic_operand_type (rtx addr)
967 enum tls_model tls_kind = TLS_MODEL_NONE;
969 if (GET_CODE (addr) == CONST)
971 if (GET_CODE (XEXP (addr, 0)) == PLUS
972 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
973 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
975 else if (GET_CODE (addr) == SYMBOL_REF)
976 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
978 return tls_kind;
981 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
982 as a base register. */
984 static inline bool
985 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
987 if (strict
988 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
989 return true;
990 else if (!strict
991 && (GENERAL_REGNO_P (REGNO (reg))
992 || !HARD_REGISTER_P (reg)))
993 return true;
994 else
995 return false;
998 static bool
999 ia64_legitimate_address_reg (const_rtx reg, bool strict)
1001 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1002 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1003 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1004 return true;
1006 return false;
1009 static bool
1010 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1012 if (GET_CODE (disp) == PLUS
1013 && rtx_equal_p (reg, XEXP (disp, 0))
1014 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1015 || (CONST_INT_P (XEXP (disp, 1))
1016 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1017 return true;
1019 return false;
1022 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1024 static bool
1025 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1026 rtx x, bool strict)
1028 if (ia64_legitimate_address_reg (x, strict))
1029 return true;
1030 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1031 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1032 && XEXP (x, 0) != arg_pointer_rtx)
1033 return true;
1034 else if (GET_CODE (x) == POST_MODIFY
1035 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1036 && XEXP (x, 0) != arg_pointer_rtx
1037 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1038 return true;
1039 else
1040 return false;
1043 /* Return true if X is a constant that is valid for some immediate
1044 field in an instruction. */
1046 static bool
1047 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1049 switch (GET_CODE (x))
1051 case CONST_INT:
1052 case LABEL_REF:
1053 return true;
1055 case CONST_DOUBLE:
1056 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1057 return true;
1058 return satisfies_constraint_G (x);
1060 case CONST:
1061 case SYMBOL_REF:
1062 /* ??? Short term workaround for PR 28490. We must make the code here
1063 match the code in ia64_expand_move and move_operand, even though they
1064 are both technically wrong. */
1065 if (tls_symbolic_operand_type (x) == 0)
1067 HOST_WIDE_INT addend = 0;
1068 rtx op = x;
1070 if (GET_CODE (op) == CONST
1071 && GET_CODE (XEXP (op, 0)) == PLUS
1072 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1074 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1075 op = XEXP (XEXP (op, 0), 0);
1078 if (any_offset_symbol_operand (op, mode)
1079 || function_operand (op, mode))
1080 return true;
1081 if (aligned_offset_symbol_operand (op, mode))
1082 return (addend & 0x3fff) == 0;
1083 return false;
1085 return false;
1087 case CONST_VECTOR:
1088 if (mode == V2SFmode)
1089 return satisfies_constraint_Y (x);
1091 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1092 && GET_MODE_SIZE (mode) <= 8);
1094 default:
1095 return false;
1099 /* Don't allow TLS addresses to get spilled to memory. */
1101 static bool
1102 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1104 if (mode == RFmode)
1105 return true;
1106 return tls_symbolic_operand_type (x) != 0;
1109 /* Expand a symbolic constant load. */
1111 bool
1112 ia64_expand_load_address (rtx dest, rtx src)
1114 gcc_assert (GET_CODE (dest) == REG);
1116 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1117 having to pointer-extend the value afterward. Other forms of address
1118 computation below are also more natural to compute as 64-bit quantities.
1119 If we've been given an SImode destination register, change it. */
1120 if (GET_MODE (dest) != Pmode)
1121 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1122 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1124 if (TARGET_NO_PIC)
1125 return false;
1126 if (small_addr_symbolic_operand (src, VOIDmode))
1127 return false;
1129 if (TARGET_AUTO_PIC)
1130 emit_insn (gen_load_gprel64 (dest, src));
1131 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1132 emit_insn (gen_load_fptr (dest, src));
1133 else if (sdata_symbolic_operand (src, VOIDmode))
1134 emit_insn (gen_load_gprel (dest, src));
1135 else if (local_symbolic_operand64 (src, VOIDmode))
1137 /* We want to use @gprel rather than @ltoff relocations for local
1138 symbols:
1139 - @gprel does not require dynamic linker
1140 - and does not use .sdata section
1141 https://gcc.gnu.org/bugzilla/60465 */
1142 emit_insn (gen_load_gprel64 (dest, src));
1144 else
1146 HOST_WIDE_INT addend = 0;
1147 rtx tmp;
1149 /* We did split constant offsets in ia64_expand_move, and we did try
1150 to keep them split in move_operand, but we also allowed reload to
1151 rematerialize arbitrary constants rather than spill the value to
1152 the stack and reload it. So we have to be prepared here to split
1153 them apart again. */
1154 if (GET_CODE (src) == CONST)
1156 HOST_WIDE_INT hi, lo;
1158 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1159 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1160 hi = hi - lo;
1162 if (lo != 0)
1164 addend = lo;
1165 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1169 tmp = gen_rtx_HIGH (Pmode, src);
1170 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1171 emit_insn (gen_rtx_SET (dest, tmp));
1173 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1174 emit_insn (gen_rtx_SET (dest, tmp));
1176 if (addend)
1178 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1179 emit_insn (gen_rtx_SET (dest, tmp));
1183 return true;
1186 static GTY(()) rtx gen_tls_tga;
1187 static rtx
1188 gen_tls_get_addr (void)
1190 if (!gen_tls_tga)
1191 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1192 return gen_tls_tga;
1195 static GTY(()) rtx thread_pointer_rtx;
1196 static rtx
1197 gen_thread_pointer (void)
1199 if (!thread_pointer_rtx)
1200 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1201 return thread_pointer_rtx;
1204 static rtx
1205 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1206 rtx orig_op1, HOST_WIDE_INT addend)
1208 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1209 rtx_insn *insns;
1210 rtx orig_op0 = op0;
1211 HOST_WIDE_INT addend_lo, addend_hi;
1213 switch (tls_kind)
1215 case TLS_MODEL_GLOBAL_DYNAMIC:
1216 start_sequence ();
1218 tga_op1 = gen_reg_rtx (Pmode);
1219 emit_insn (gen_load_dtpmod (tga_op1, op1));
1221 tga_op2 = gen_reg_rtx (Pmode);
1222 emit_insn (gen_load_dtprel (tga_op2, op1));
1224 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1225 LCT_CONST, Pmode,
1226 tga_op1, Pmode, tga_op2, Pmode);
1228 insns = get_insns ();
1229 end_sequence ();
1231 if (GET_MODE (op0) != Pmode)
1232 op0 = tga_ret;
1233 emit_libcall_block (insns, op0, tga_ret, op1);
1234 break;
1236 case TLS_MODEL_LOCAL_DYNAMIC:
1237 /* ??? This isn't the completely proper way to do local-dynamic
1238 If the call to __tls_get_addr is used only by a single symbol,
1239 then we should (somehow) move the dtprel to the second arg
1240 to avoid the extra add. */
1241 start_sequence ();
1243 tga_op1 = gen_reg_rtx (Pmode);
1244 emit_insn (gen_load_dtpmod (tga_op1, op1));
1246 tga_op2 = const0_rtx;
1248 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1249 LCT_CONST, Pmode,
1250 tga_op1, Pmode, tga_op2, Pmode);
1252 insns = get_insns ();
1253 end_sequence ();
1255 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1256 UNSPEC_LD_BASE);
1257 tmp = gen_reg_rtx (Pmode);
1258 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1260 if (!register_operand (op0, Pmode))
1261 op0 = gen_reg_rtx (Pmode);
1262 if (TARGET_TLS64)
1264 emit_insn (gen_load_dtprel (op0, op1));
1265 emit_insn (gen_adddi3 (op0, tmp, op0));
1267 else
1268 emit_insn (gen_add_dtprel (op0, op1, tmp));
1269 break;
1271 case TLS_MODEL_INITIAL_EXEC:
1272 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1273 addend_hi = addend - addend_lo;
1275 op1 = plus_constant (Pmode, op1, addend_hi);
1276 addend = addend_lo;
1278 tmp = gen_reg_rtx (Pmode);
1279 emit_insn (gen_load_tprel (tmp, op1));
1281 if (!register_operand (op0, Pmode))
1282 op0 = gen_reg_rtx (Pmode);
1283 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1284 break;
1286 case TLS_MODEL_LOCAL_EXEC:
1287 if (!register_operand (op0, Pmode))
1288 op0 = gen_reg_rtx (Pmode);
1290 op1 = orig_op1;
1291 addend = 0;
1292 if (TARGET_TLS64)
1294 emit_insn (gen_load_tprel (op0, op1));
1295 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1297 else
1298 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1299 break;
1301 default:
1302 gcc_unreachable ();
1305 if (addend)
1306 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1307 orig_op0, 1, OPTAB_DIRECT);
1308 if (orig_op0 == op0)
1309 return NULL_RTX;
1310 if (GET_MODE (orig_op0) == Pmode)
1311 return op0;
1312 return gen_lowpart (GET_MODE (orig_op0), op0);
1316 ia64_expand_move (rtx op0, rtx op1)
1318 machine_mode mode = GET_MODE (op0);
1320 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1321 op1 = force_reg (mode, op1);
1323 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1325 HOST_WIDE_INT addend = 0;
1326 enum tls_model tls_kind;
1327 rtx sym = op1;
1329 if (GET_CODE (op1) == CONST
1330 && GET_CODE (XEXP (op1, 0)) == PLUS
1331 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1333 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1334 sym = XEXP (XEXP (op1, 0), 0);
1337 tls_kind = tls_symbolic_operand_type (sym);
1338 if (tls_kind)
1339 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1341 if (any_offset_symbol_operand (sym, mode))
1342 addend = 0;
1343 else if (aligned_offset_symbol_operand (sym, mode))
1345 HOST_WIDE_INT addend_lo, addend_hi;
1347 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1348 addend_hi = addend - addend_lo;
1350 if (addend_lo != 0)
1352 op1 = plus_constant (mode, sym, addend_hi);
1353 addend = addend_lo;
1355 else
1356 addend = 0;
1358 else
1359 op1 = sym;
1361 if (reload_completed)
1363 /* We really should have taken care of this offset earlier. */
1364 gcc_assert (addend == 0);
1365 if (ia64_expand_load_address (op0, op1))
1366 return NULL_RTX;
1369 if (addend)
1371 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1373 emit_insn (gen_rtx_SET (subtarget, op1));
1375 op1 = expand_simple_binop (mode, PLUS, subtarget,
1376 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1377 if (op0 == op1)
1378 return NULL_RTX;
1382 return op1;
1385 /* Split a move from OP1 to OP0 conditional on COND. */
1387 void
1388 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1390 rtx_insn *insn, *first = get_last_insn ();
1392 emit_move_insn (op0, op1);
1394 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1395 if (INSN_P (insn))
1396 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1397 PATTERN (insn));
1400 /* Split a post-reload TImode or TFmode reference into two DImode
1401 components. This is made extra difficult by the fact that we do
1402 not get any scratch registers to work with, because reload cannot
1403 be prevented from giving us a scratch that overlaps the register
1404 pair involved. So instead, when addressing memory, we tweak the
1405 pointer register up and back down with POST_INCs. Or up and not
1406 back down when we can get away with it.
1408 REVERSED is true when the loads must be done in reversed order
1409 (high word first) for correctness. DEAD is true when the pointer
1410 dies with the second insn we generate and therefore the second
1411 address must not carry a postmodify.
1413 May return an insn which is to be emitted after the moves. */
1415 static rtx
1416 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1418 rtx fixup = 0;
1420 switch (GET_CODE (in))
1422 case REG:
1423 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1424 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1425 break;
1427 case CONST_INT:
1428 case CONST_DOUBLE:
1429 /* Cannot occur reversed. */
1430 gcc_assert (!reversed);
1432 if (GET_MODE (in) != TFmode)
1433 split_double (in, &out[0], &out[1]);
1434 else
1435 /* split_double does not understand how to split a TFmode
1436 quantity into a pair of DImode constants. */
1438 unsigned HOST_WIDE_INT p[2];
1439 long l[4]; /* TFmode is 128 bits */
1441 real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1443 if (FLOAT_WORDS_BIG_ENDIAN)
1445 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1446 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1448 else
1450 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1451 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1453 out[0] = GEN_INT (p[0]);
1454 out[1] = GEN_INT (p[1]);
1456 break;
1458 case MEM:
1460 rtx base = XEXP (in, 0);
1461 rtx offset;
1463 switch (GET_CODE (base))
1465 case REG:
1466 if (!reversed)
1468 out[0] = adjust_automodify_address
1469 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1470 out[1] = adjust_automodify_address
1471 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1473 else
1475 /* Reversal requires a pre-increment, which can only
1476 be done as a separate insn. */
1477 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1478 out[0] = adjust_automodify_address
1479 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1480 out[1] = adjust_address (in, DImode, 0);
1482 break;
1484 case POST_INC:
1485 gcc_assert (!reversed && !dead);
1487 /* Just do the increment in two steps. */
1488 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1489 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1490 break;
1492 case POST_DEC:
1493 gcc_assert (!reversed && !dead);
1495 /* Add 8, subtract 24. */
1496 base = XEXP (base, 0);
1497 out[0] = adjust_automodify_address
1498 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1499 out[1] = adjust_automodify_address
1500 (in, DImode,
1501 gen_rtx_POST_MODIFY (Pmode, base,
1502 plus_constant (Pmode, base, -24)),
1504 break;
1506 case POST_MODIFY:
1507 gcc_assert (!reversed && !dead);
1509 /* Extract and adjust the modification. This case is
1510 trickier than the others, because we might have an
1511 index register, or we might have a combined offset that
1512 doesn't fit a signed 9-bit displacement field. We can
1513 assume the incoming expression is already legitimate. */
1514 offset = XEXP (base, 1);
1515 base = XEXP (base, 0);
1517 out[0] = adjust_automodify_address
1518 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1520 if (GET_CODE (XEXP (offset, 1)) == REG)
1522 /* Can't adjust the postmodify to match. Emit the
1523 original, then a separate addition insn. */
1524 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1525 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1527 else
1529 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1530 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1532 /* Again the postmodify cannot be made to match,
1533 but in this case it's more efficient to get rid
1534 of the postmodify entirely and fix up with an
1535 add insn. */
1536 out[1] = adjust_automodify_address (in, DImode, base, 8);
1537 fixup = gen_adddi3
1538 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1540 else
1542 /* Combined offset still fits in the displacement field.
1543 (We cannot overflow it at the high end.) */
1544 out[1] = adjust_automodify_address
1545 (in, DImode, gen_rtx_POST_MODIFY
1546 (Pmode, base, gen_rtx_PLUS
1547 (Pmode, base,
1548 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1552 break;
1554 default:
1555 gcc_unreachable ();
1557 break;
1560 default:
1561 gcc_unreachable ();
1564 return fixup;
1567 /* Split a TImode or TFmode move instruction after reload.
1568 This is used by *movtf_internal and *movti_internal. */
1569 void
1570 ia64_split_tmode_move (rtx operands[])
1572 rtx in[2], out[2], insn;
1573 rtx fixup[2];
1574 bool dead = false;
1575 bool reversed = false;
1577 /* It is possible for reload to decide to overwrite a pointer with
1578 the value it points to. In that case we have to do the loads in
1579 the appropriate order so that the pointer is not destroyed too
1580 early. Also we must not generate a postmodify for that second
1581 load, or rws_access_regno will die. And we must not generate a
1582 postmodify for the second load if the destination register
1583 overlaps with the base register. */
1584 if (GET_CODE (operands[1]) == MEM
1585 && reg_overlap_mentioned_p (operands[0], operands[1]))
1587 rtx base = XEXP (operands[1], 0);
1588 while (GET_CODE (base) != REG)
1589 base = XEXP (base, 0);
1591 if (REGNO (base) == REGNO (operands[0]))
1592 reversed = true;
1594 if (refers_to_regno_p (REGNO (operands[0]),
1595 REGNO (operands[0])+2,
1596 base, 0))
1597 dead = true;
1599 /* Another reason to do the moves in reversed order is if the first
1600 element of the target register pair is also the second element of
1601 the source register pair. */
1602 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1603 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1604 reversed = true;
1606 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1607 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1609 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1610 if (GET_CODE (EXP) == MEM \
1611 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1612 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1613 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1614 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1616 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1617 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1618 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1620 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1621 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1622 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1624 if (fixup[0])
1625 emit_insn (fixup[0]);
1626 if (fixup[1])
1627 emit_insn (fixup[1]);
1629 #undef MAYBE_ADD_REG_INC_NOTE
1632 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1633 through memory plus an extra GR scratch register. Except that you can
1634 either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1635 from SECONDARY_RELOAD_CLASS, but not both.
1637 We got into problems in the first place by allowing a construct like
1638 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1639 This solution attempts to prevent this situation from occurring. When
1640 we see something like the above, we spill the inner register to memory. */
1642 static rtx
1643 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1645 if (GET_CODE (in) == SUBREG
1646 && GET_MODE (SUBREG_REG (in)) == TImode
1647 && GET_CODE (SUBREG_REG (in)) == REG)
1649 rtx memt = assign_stack_temp (TImode, 16);
1650 emit_move_insn (memt, SUBREG_REG (in));
1651 return adjust_address (memt, mode, 0);
1653 else if (force && GET_CODE (in) == REG)
1655 rtx memx = assign_stack_temp (mode, 16);
1656 emit_move_insn (memx, in);
1657 return memx;
1659 else
1660 return in;
1663 /* Expand the movxf or movrf pattern (MODE says which) with the given
1664 OPERANDS, returning true if the pattern should then invoke
1665 DONE. */
1667 bool
1668 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1670 rtx op0 = operands[0];
1672 if (GET_CODE (op0) == SUBREG)
1673 op0 = SUBREG_REG (op0);
1675 /* We must support XFmode loads into general registers for stdarg/vararg,
1676 unprototyped calls, and a rare case where a long double is passed as
1677 an argument after a float HFA fills the FP registers. We split them into
1678 DImode loads for convenience. We also need to support XFmode stores
1679 for the last case. This case does not happen for stdarg/vararg routines,
1680 because we do a block store to memory of unnamed arguments. */
1682 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1684 rtx out[2];
1686 /* We're hoping to transform everything that deals with XFmode
1687 quantities and GR registers early in the compiler. */
1688 gcc_assert (can_create_pseudo_p ());
1690 /* Struct to register can just use TImode instead. */
1691 if ((GET_CODE (operands[1]) == SUBREG
1692 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1693 || (GET_CODE (operands[1]) == REG
1694 && GR_REGNO_P (REGNO (operands[1]))))
1696 rtx op1 = operands[1];
1698 if (GET_CODE (op1) == SUBREG)
1699 op1 = SUBREG_REG (op1);
1700 else
1701 op1 = gen_rtx_REG (TImode, REGNO (op1));
1703 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1704 return true;
1707 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1709 /* Don't word-swap when reading in the constant. */
1710 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1711 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1712 0, mode));
1713 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1714 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1715 0, mode));
1716 return true;
1719 /* If the quantity is in a register not known to be GR, spill it. */
1720 if (register_operand (operands[1], mode))
1721 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1723 gcc_assert (GET_CODE (operands[1]) == MEM);
1725 /* Don't word-swap when reading in the value. */
1726 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1727 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1729 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1730 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1731 return true;
1734 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1736 /* We're hoping to transform everything that deals with XFmode
1737 quantities and GR registers early in the compiler. */
1738 gcc_assert (can_create_pseudo_p ());
1740 /* Op0 can't be a GR_REG here, as that case is handled above.
1741 If op0 is a register, then we spill op1, so that we now have a
1742 MEM operand. This requires creating an XFmode subreg of a TImode reg
1743 to force the spill. */
1744 if (register_operand (operands[0], mode))
1746 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1747 op1 = gen_rtx_SUBREG (mode, op1, 0);
1748 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1751 else
1753 rtx in[2];
1755 gcc_assert (GET_CODE (operands[0]) == MEM);
1757 /* Don't word-swap when writing out the value. */
1758 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1759 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1761 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1762 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1763 return true;
1767 if (!reload_in_progress && !reload_completed)
1769 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1771 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1773 rtx memt, memx, in = operands[1];
1774 if (CONSTANT_P (in))
1775 in = validize_mem (force_const_mem (mode, in));
1776 if (GET_CODE (in) == MEM)
1777 memt = adjust_address (in, TImode, 0);
1778 else
1780 memt = assign_stack_temp (TImode, 16);
1781 memx = adjust_address (memt, mode, 0);
1782 emit_move_insn (memx, in);
1784 emit_move_insn (op0, memt);
1785 return true;
1788 if (!ia64_move_ok (operands[0], operands[1]))
1789 operands[1] = force_reg (mode, operands[1]);
1792 return false;
1795 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1796 with the expression that holds the compare result (in VOIDmode). */
1798 static GTY(()) rtx cmptf_libfunc;
1800 void
1801 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1803 enum rtx_code code = GET_CODE (*expr);
1804 rtx cmp;
1806 /* If we have a BImode input, then we already have a compare result, and
1807 do not need to emit another comparison. */
1808 if (GET_MODE (*op0) == BImode)
1810 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1811 cmp = *op0;
1813 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1814 magic number as its third argument, that indicates what to do.
1815 The return value is an integer to be compared against zero. */
1816 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1818 enum qfcmp_magic {
1819 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
1820 QCMP_UNORD = 2,
1821 QCMP_EQ = 4,
1822 QCMP_LT = 8,
1823 QCMP_GT = 16
1825 int magic;
1826 enum rtx_code ncode;
1827 rtx ret;
1829 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1830 switch (code)
1832 /* 1 = equal, 0 = not equal. Equality operators do
1833 not raise FP_INVALID when given a NaN operand. */
1834 case EQ: magic = QCMP_EQ; ncode = NE; break;
1835 case NE: magic = QCMP_EQ; ncode = EQ; break;
1836 /* isunordered() from C99. */
1837 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1838 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1839 /* Relational operators raise FP_INVALID when given
1840 a NaN operand. */
1841 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1842 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1843 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1844 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1845 /* Unordered relational operators do not raise FP_INVALID
1846 when given a NaN operand. */
1847 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1848 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1849 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1850 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1851 /* Not supported. */
1852 case UNEQ:
1853 case LTGT:
1854 default: gcc_unreachable ();
1857 start_sequence ();
1859 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
1860 *op0, TFmode, *op1, TFmode,
1861 GEN_INT (magic), DImode);
1862 cmp = gen_reg_rtx (BImode);
1863 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1864 ret, const0_rtx)));
1866 rtx_insn *insns = get_insns ();
1867 end_sequence ();
1869 emit_libcall_block (insns, cmp, cmp,
1870 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1871 code = NE;
1873 else
1875 cmp = gen_reg_rtx (BImode);
1876 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1877 code = NE;
1880 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1881 *op0 = cmp;
1882 *op1 = const0_rtx;
1885 /* Generate an integral vector comparison. Return true if the condition has
1886 been reversed, and so the sense of the comparison should be inverted. */
1888 static bool
1889 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1890 rtx dest, rtx op0, rtx op1)
1892 bool negate = false;
1893 rtx x;
1895 /* Canonicalize the comparison to EQ, GT, GTU. */
1896 switch (code)
1898 case EQ:
1899 case GT:
1900 case GTU:
1901 break;
1903 case NE:
1904 case LE:
1905 case LEU:
1906 code = reverse_condition (code);
1907 negate = true;
1908 break;
1910 case GE:
1911 case GEU:
1912 code = reverse_condition (code);
1913 negate = true;
1914 /* FALLTHRU */
1916 case LT:
1917 case LTU:
1918 code = swap_condition (code);
1919 x = op0, op0 = op1, op1 = x;
1920 break;
1922 default:
1923 gcc_unreachable ();
1926 /* Unsigned parallel compare is not supported by the hardware. Play some
1927 tricks to turn this into a signed comparison against 0. */
1928 if (code == GTU)
1930 switch (mode)
1932 case E_V2SImode:
1934 rtx t1, t2, mask;
1936 /* Subtract (-(INT MAX) - 1) from both operands to make
1937 them signed. */
1938 mask = gen_int_mode (0x80000000, SImode);
1939 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1940 mask = force_reg (mode, mask);
1941 t1 = gen_reg_rtx (mode);
1942 emit_insn (gen_subv2si3 (t1, op0, mask));
1943 t2 = gen_reg_rtx (mode);
1944 emit_insn (gen_subv2si3 (t2, op1, mask));
1945 op0 = t1;
1946 op1 = t2;
1947 code = GT;
1949 break;
1951 case E_V8QImode:
1952 case E_V4HImode:
1953 /* Perform a parallel unsigned saturating subtraction. */
1954 x = gen_reg_rtx (mode);
1955 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1957 code = EQ;
1958 op0 = x;
1959 op1 = CONST0_RTX (mode);
1960 negate = !negate;
1961 break;
1963 default:
1964 gcc_unreachable ();
1968 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1969 emit_insn (gen_rtx_SET (dest, x));
1971 return negate;
1974 /* Emit an integral vector conditional move. */
1976 void
1977 ia64_expand_vecint_cmov (rtx operands[])
1979 machine_mode mode = GET_MODE (operands[0]);
1980 enum rtx_code code = GET_CODE (operands[3]);
1981 bool negate;
1982 rtx cmp, x, ot, of;
1984 cmp = gen_reg_rtx (mode);
1985 negate = ia64_expand_vecint_compare (code, mode, cmp,
1986 operands[4], operands[5]);
1988 ot = operands[1+negate];
1989 of = operands[2-negate];
1991 if (ot == CONST0_RTX (mode))
1993 if (of == CONST0_RTX (mode))
1995 emit_move_insn (operands[0], ot);
1996 return;
1999 x = gen_rtx_NOT (mode, cmp);
2000 x = gen_rtx_AND (mode, x, of);
2001 emit_insn (gen_rtx_SET (operands[0], x));
2003 else if (of == CONST0_RTX (mode))
2005 x = gen_rtx_AND (mode, cmp, ot);
2006 emit_insn (gen_rtx_SET (operands[0], x));
2008 else
2010 rtx t, f;
2012 t = gen_reg_rtx (mode);
2013 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2014 emit_insn (gen_rtx_SET (t, x));
2016 f = gen_reg_rtx (mode);
2017 x = gen_rtx_NOT (mode, cmp);
2018 x = gen_rtx_AND (mode, x, operands[2-negate]);
2019 emit_insn (gen_rtx_SET (f, x));
2021 x = gen_rtx_IOR (mode, t, f);
2022 emit_insn (gen_rtx_SET (operands[0], x));
2026 /* Emit an integral vector min or max operation. Return true if all done. */
2028 bool
2029 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2030 rtx operands[])
2032 rtx xops[6];
2034 /* These four combinations are supported directly. */
2035 if (mode == V8QImode && (code == UMIN || code == UMAX))
2036 return false;
2037 if (mode == V4HImode && (code == SMIN || code == SMAX))
2038 return false;
2040 /* This combination can be implemented with only saturating subtraction. */
2041 if (mode == V4HImode && code == UMAX)
2043 rtx x, tmp = gen_reg_rtx (mode);
2045 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2046 emit_insn (gen_rtx_SET (tmp, x));
2048 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2049 return true;
2052 /* Everything else implemented via vector comparisons. */
2053 xops[0] = operands[0];
2054 xops[4] = xops[1] = operands[1];
2055 xops[5] = xops[2] = operands[2];
2057 switch (code)
2059 case UMIN:
2060 code = LTU;
2061 break;
2062 case UMAX:
2063 code = GTU;
2064 break;
2065 case SMIN:
2066 code = LT;
2067 break;
2068 case SMAX:
2069 code = GT;
2070 break;
2071 default:
2072 gcc_unreachable ();
2074 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2076 ia64_expand_vecint_cmov (xops);
2077 return true;
2080 /* The vectors LO and HI each contain N halves of a double-wide vector.
2081 Reassemble either the first N/2 or the second N/2 elements. */
2083 void
2084 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2086 machine_mode vmode = GET_MODE (lo);
2087 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2088 struct expand_vec_perm_d d;
2089 bool ok;
2091 d.target = gen_lowpart (vmode, out);
2092 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2093 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2094 d.vmode = vmode;
2095 d.nelt = nelt;
2096 d.one_operand_p = false;
2097 d.testing_p = false;
2099 high = (highp ? nelt / 2 : 0);
2100 for (i = 0; i < nelt / 2; ++i)
2102 d.perm[i * 2] = i + high;
2103 d.perm[i * 2 + 1] = i + high + nelt;
2106 ok = ia64_expand_vec_perm_const_1 (&d);
2107 gcc_assert (ok);
2110 /* Return a vector of the sign-extension of VEC. */
2112 static rtx
2113 ia64_unpack_sign (rtx vec, bool unsignedp)
2115 machine_mode mode = GET_MODE (vec);
2116 rtx zero = CONST0_RTX (mode);
2118 if (unsignedp)
2119 return zero;
2120 else
2122 rtx sign = gen_reg_rtx (mode);
2123 bool neg;
2125 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2126 gcc_assert (!neg);
2128 return sign;
2132 /* Emit an integral vector unpack operation. */
2134 void
2135 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2137 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2138 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2141 /* Emit an integral vector widening sum operations. */
2143 void
2144 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2146 machine_mode wmode;
2147 rtx l, h, t, sign;
2149 sign = ia64_unpack_sign (operands[1], unsignedp);
2151 wmode = GET_MODE (operands[0]);
2152 l = gen_reg_rtx (wmode);
2153 h = gen_reg_rtx (wmode);
2155 ia64_unpack_assemble (l, operands[1], sign, false);
2156 ia64_unpack_assemble (h, operands[1], sign, true);
2158 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2159 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2160 if (t != operands[0])
2161 emit_move_insn (operands[0], t);
2164 /* Emit the appropriate sequence for a call. */
2166 void
2167 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2168 int sibcall_p)
2170 rtx insn, b0;
2172 addr = XEXP (addr, 0);
2173 addr = convert_memory_address (DImode, addr);
2174 b0 = gen_rtx_REG (DImode, R_BR (0));
2176 /* ??? Should do this for functions known to bind local too. */
2177 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2179 if (sibcall_p)
2180 insn = gen_sibcall_nogp (addr);
2181 else if (! retval)
2182 insn = gen_call_nogp (addr, b0);
2183 else
2184 insn = gen_call_value_nogp (retval, addr, b0);
2185 insn = emit_call_insn (insn);
2187 else
2189 if (sibcall_p)
2190 insn = gen_sibcall_gp (addr);
2191 else if (! retval)
2192 insn = gen_call_gp (addr, b0);
2193 else
2194 insn = gen_call_value_gp (retval, addr, b0);
2195 insn = emit_call_insn (insn);
2197 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2200 if (sibcall_p)
2201 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2203 if (TARGET_ABI_OPEN_VMS)
2204 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2205 gen_rtx_REG (DImode, GR_REG (25)));
2208 static void
2209 reg_emitted (enum ia64_frame_regs r)
2211 if (emitted_frame_related_regs[r] == 0)
2212 emitted_frame_related_regs[r] = current_frame_info.r[r];
2213 else
2214 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2217 static int
2218 get_reg (enum ia64_frame_regs r)
2220 reg_emitted (r);
2221 return current_frame_info.r[r];
2224 static bool
2225 is_emitted (int regno)
2227 unsigned int r;
2229 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2230 if (emitted_frame_related_regs[r] == regno)
2231 return true;
2232 return false;
2235 void
2236 ia64_reload_gp (void)
2238 rtx tmp;
2240 if (current_frame_info.r[reg_save_gp])
2242 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2244 else
2246 HOST_WIDE_INT offset;
2247 rtx offset_r;
2249 offset = (current_frame_info.spill_cfa_off
2250 + current_frame_info.spill_size);
2251 if (frame_pointer_needed)
2253 tmp = hard_frame_pointer_rtx;
2254 offset = -offset;
2256 else
2258 tmp = stack_pointer_rtx;
2259 offset = current_frame_info.total_size - offset;
2262 offset_r = GEN_INT (offset);
2263 if (satisfies_constraint_I (offset_r))
2264 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2265 else
2267 emit_move_insn (pic_offset_table_rtx, offset_r);
2268 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2269 pic_offset_table_rtx, tmp));
2272 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2275 emit_move_insn (pic_offset_table_rtx, tmp);
2278 void
2279 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2280 rtx scratch_b, int noreturn_p, int sibcall_p)
2282 rtx insn;
2283 bool is_desc = false;
2285 /* If we find we're calling through a register, then we're actually
2286 calling through a descriptor, so load up the values. */
2287 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2289 rtx tmp;
2290 bool addr_dead_p;
2292 /* ??? We are currently constrained to *not* use peep2, because
2293 we can legitimately change the global lifetime of the GP
2294 (in the form of killing where previously live). This is
2295 because a call through a descriptor doesn't use the previous
2296 value of the GP, while a direct call does, and we do not
2297 commit to either form until the split here.
2299 That said, this means that we lack precise life info for
2300 whether ADDR is dead after this call. This is not terribly
2301 important, since we can fix things up essentially for free
2302 with the POST_DEC below, but it's nice to not use it when we
2303 can immediately tell it's not necessary. */
2304 addr_dead_p = ((noreturn_p || sibcall_p
2305 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2306 REGNO (addr)))
2307 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2309 /* Load the code address into scratch_b. */
2310 tmp = gen_rtx_POST_INC (Pmode, addr);
2311 tmp = gen_rtx_MEM (Pmode, tmp);
2312 emit_move_insn (scratch_r, tmp);
2313 emit_move_insn (scratch_b, scratch_r);
2315 /* Load the GP address. If ADDR is not dead here, then we must
2316 revert the change made above via the POST_INCREMENT. */
2317 if (!addr_dead_p)
2318 tmp = gen_rtx_POST_DEC (Pmode, addr);
2319 else
2320 tmp = addr;
2321 tmp = gen_rtx_MEM (Pmode, tmp);
2322 emit_move_insn (pic_offset_table_rtx, tmp);
2324 is_desc = true;
2325 addr = scratch_b;
2328 if (sibcall_p)
2329 insn = gen_sibcall_nogp (addr);
2330 else if (retval)
2331 insn = gen_call_value_nogp (retval, addr, retaddr);
2332 else
2333 insn = gen_call_nogp (addr, retaddr);
2334 emit_call_insn (insn);
2336 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2337 ia64_reload_gp ();
2340 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2342 This differs from the generic code in that we know about the zero-extending
2343 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2344 also know that ld.acq+cmpxchg.rel equals a full barrier.
2346 The loop we want to generate looks like
2348 cmp_reg = mem;
2349 label:
2350 old_reg = cmp_reg;
2351 new_reg = cmp_reg op val;
2352 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2353 if (cmp_reg != old_reg)
2354 goto label;
2356 Note that we only do the plain load from memory once. Subsequent
2357 iterations use the value loaded by the compare-and-swap pattern. */
2359 void
2360 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2361 rtx old_dst, rtx new_dst, enum memmodel model)
2363 machine_mode mode = GET_MODE (mem);
2364 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2365 enum insn_code icode;
2367 /* Special case for using fetchadd. */
2368 if ((mode == SImode || mode == DImode)
2369 && (code == PLUS || code == MINUS)
2370 && fetchadd_operand (val, mode))
2372 if (code == MINUS)
2373 val = GEN_INT (-INTVAL (val));
2375 if (!old_dst)
2376 old_dst = gen_reg_rtx (mode);
2378 switch (model)
2380 case MEMMODEL_ACQ_REL:
2381 case MEMMODEL_SEQ_CST:
2382 case MEMMODEL_SYNC_SEQ_CST:
2383 emit_insn (gen_memory_barrier ());
2384 /* FALLTHRU */
2385 case MEMMODEL_RELAXED:
2386 case MEMMODEL_ACQUIRE:
2387 case MEMMODEL_SYNC_ACQUIRE:
2388 case MEMMODEL_CONSUME:
2389 if (mode == SImode)
2390 icode = CODE_FOR_fetchadd_acq_si;
2391 else
2392 icode = CODE_FOR_fetchadd_acq_di;
2393 break;
2394 case MEMMODEL_RELEASE:
2395 case MEMMODEL_SYNC_RELEASE:
2396 if (mode == SImode)
2397 icode = CODE_FOR_fetchadd_rel_si;
2398 else
2399 icode = CODE_FOR_fetchadd_rel_di;
2400 break;
2402 default:
2403 gcc_unreachable ();
2406 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2408 if (new_dst)
2410 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2411 true, OPTAB_WIDEN);
2412 if (new_reg != new_dst)
2413 emit_move_insn (new_dst, new_reg);
2415 return;
2418 /* Because of the volatile mem read, we get an ld.acq, which is the
2419 front half of the full barrier. The end half is the cmpxchg.rel.
2420 For relaxed and release memory models, we don't need this. But we
2421 also don't bother trying to prevent it either. */
2422 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2423 || MEM_VOLATILE_P (mem));
2425 old_reg = gen_reg_rtx (DImode);
2426 cmp_reg = gen_reg_rtx (DImode);
2427 label = gen_label_rtx ();
2429 if (mode != DImode)
2431 val = simplify_gen_subreg (DImode, val, mode, 0);
2432 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2434 else
2435 emit_move_insn (cmp_reg, mem);
2437 emit_label (label);
2439 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2440 emit_move_insn (old_reg, cmp_reg);
2441 emit_move_insn (ar_ccv, cmp_reg);
2443 if (old_dst)
2444 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2446 new_reg = cmp_reg;
2447 if (code == NOT)
2449 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2450 true, OPTAB_DIRECT);
2451 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2453 else
2454 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2455 true, OPTAB_DIRECT);
2457 if (mode != DImode)
2458 new_reg = gen_lowpart (mode, new_reg);
2459 if (new_dst)
2460 emit_move_insn (new_dst, new_reg);
2462 switch (model)
2464 case MEMMODEL_RELAXED:
2465 case MEMMODEL_ACQUIRE:
2466 case MEMMODEL_SYNC_ACQUIRE:
2467 case MEMMODEL_CONSUME:
2468 switch (mode)
2470 case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2471 case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2472 case E_SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2473 case E_DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2474 default:
2475 gcc_unreachable ();
2477 break;
2479 case MEMMODEL_RELEASE:
2480 case MEMMODEL_SYNC_RELEASE:
2481 case MEMMODEL_ACQ_REL:
2482 case MEMMODEL_SEQ_CST:
2483 case MEMMODEL_SYNC_SEQ_CST:
2484 switch (mode)
2486 case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2487 case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2488 case E_SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2489 case E_DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2490 default:
2491 gcc_unreachable ();
2493 break;
2495 default:
2496 gcc_unreachable ();
2499 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2501 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2504 /* Begin the assembly file. */
2506 static void
2507 ia64_file_start (void)
2509 default_file_start ();
2510 emit_safe_across_calls ();
2513 void
2514 emit_safe_across_calls (void)
2516 unsigned int rs, re;
2517 int out_state;
2519 rs = 1;
2520 out_state = 0;
2521 while (1)
2523 while (rs < 64 && call_used_regs[PR_REG (rs)])
2524 rs++;
2525 if (rs >= 64)
2526 break;
2527 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2528 continue;
2529 if (out_state == 0)
2531 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2532 out_state = 1;
2534 else
2535 fputc (',', asm_out_file);
2536 if (re == rs + 1)
2537 fprintf (asm_out_file, "p%u", rs);
2538 else
2539 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2540 rs = re + 1;
2542 if (out_state)
2543 fputc ('\n', asm_out_file);
2546 /* Globalize a declaration. */
2548 static void
2549 ia64_globalize_decl_name (FILE * stream, tree decl)
2551 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2552 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2553 if (version_attr)
2555 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2556 const char *p = TREE_STRING_POINTER (v);
2557 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2559 targetm.asm_out.globalize_label (stream, name);
2560 if (TREE_CODE (decl) == FUNCTION_DECL)
2561 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2564 /* Helper function for ia64_compute_frame_size: find an appropriate general
2565 register to spill some special register to. SPECIAL_SPILL_MASK contains
2566 bits in GR0 to GR31 that have already been allocated by this routine.
2567 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2569 static int
2570 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2572 int regno;
2574 if (emitted_frame_related_regs[r] != 0)
2576 regno = emitted_frame_related_regs[r];
2577 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2578 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2579 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2580 else if (crtl->is_leaf
2581 && regno >= GR_REG (1) && regno <= GR_REG (31))
2582 current_frame_info.gr_used_mask |= 1 << regno;
2584 return regno;
2587 /* If this is a leaf function, first try an otherwise unused
2588 call-clobbered register. */
2589 if (crtl->is_leaf)
2591 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2592 if (! df_regs_ever_live_p (regno)
2593 && call_used_regs[regno]
2594 && ! fixed_regs[regno]
2595 && ! global_regs[regno]
2596 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2597 && ! is_emitted (regno))
2599 current_frame_info.gr_used_mask |= 1 << regno;
2600 return regno;
2604 if (try_locals)
2606 regno = current_frame_info.n_local_regs;
2607 /* If there is a frame pointer, then we can't use loc79, because
2608 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2609 reg_name switching code in ia64_expand_prologue. */
2610 while (regno < (80 - frame_pointer_needed))
2611 if (! is_emitted (LOC_REG (regno++)))
2613 current_frame_info.n_local_regs = regno;
2614 return LOC_REG (regno - 1);
2618 /* Failed to find a general register to spill to. Must use stack. */
2619 return 0;
2622 /* In order to make for nice schedules, we try to allocate every temporary
2623 to a different register. We must of course stay away from call-saved,
2624 fixed, and global registers. We must also stay away from registers
2625 allocated in current_frame_info.gr_used_mask, since those include regs
2626 used all through the prologue.
2628 Any register allocated here must be used immediately. The idea is to
2629 aid scheduling, not to solve data flow problems. */
2631 static int last_scratch_gr_reg;
2633 static int
2634 next_scratch_gr_reg (void)
2636 int i, regno;
2638 for (i = 0; i < 32; ++i)
2640 regno = (last_scratch_gr_reg + i + 1) & 31;
2641 if (call_used_regs[regno]
2642 && ! fixed_regs[regno]
2643 && ! global_regs[regno]
2644 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2646 last_scratch_gr_reg = regno;
2647 return regno;
2651 /* There must be _something_ available. */
2652 gcc_unreachable ();
2655 /* Helper function for ia64_compute_frame_size, called through
2656 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2658 static void
2659 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2661 unsigned int regno = REGNO (reg);
2662 if (regno < 32)
2664 unsigned int i, n = REG_NREGS (reg);
2665 for (i = 0; i < n; ++i)
2666 current_frame_info.gr_used_mask |= 1 << (regno + i);
2671 /* Returns the number of bytes offset between the frame pointer and the stack
2672 pointer for the current function. SIZE is the number of bytes of space
2673 needed for local variables. */
2675 static void
2676 ia64_compute_frame_size (HOST_WIDE_INT size)
2678 HOST_WIDE_INT total_size;
2679 HOST_WIDE_INT spill_size = 0;
2680 HOST_WIDE_INT extra_spill_size = 0;
2681 HOST_WIDE_INT pretend_args_size;
2682 HARD_REG_SET mask;
2683 int n_spilled = 0;
2684 int spilled_gr_p = 0;
2685 int spilled_fr_p = 0;
2686 unsigned int regno;
2687 int min_regno;
2688 int max_regno;
2689 int i;
2691 if (current_frame_info.initialized)
2692 return;
2694 memset (&current_frame_info, 0, sizeof current_frame_info);
2695 CLEAR_HARD_REG_SET (mask);
2697 /* Don't allocate scratches to the return register. */
2698 diddle_return_value (mark_reg_gr_used_mask, NULL);
2700 /* Don't allocate scratches to the EH scratch registers. */
2701 if (cfun->machine->ia64_eh_epilogue_sp)
2702 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2703 if (cfun->machine->ia64_eh_epilogue_bsp)
2704 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2706 /* Static stack checking uses r2 and r3. */
2707 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2708 current_frame_info.gr_used_mask |= 0xc;
2710 /* Find the size of the register stack frame. We have only 80 local
2711 registers, because we reserve 8 for the inputs and 8 for the
2712 outputs. */
2714 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2715 since we'll be adjusting that down later. */
2716 regno = LOC_REG (78) + ! frame_pointer_needed;
2717 for (; regno >= LOC_REG (0); regno--)
2718 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2719 break;
2720 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2722 /* For functions marked with the syscall_linkage attribute, we must mark
2723 all eight input registers as in use, so that locals aren't visible to
2724 the caller. */
2726 if (cfun->machine->n_varargs > 0
2727 || lookup_attribute ("syscall_linkage",
2728 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2729 current_frame_info.n_input_regs = 8;
2730 else
2732 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2733 if (df_regs_ever_live_p (regno))
2734 break;
2735 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2738 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2739 if (df_regs_ever_live_p (regno))
2740 break;
2741 i = regno - OUT_REG (0) + 1;
2743 #ifndef PROFILE_HOOK
2744 /* When -p profiling, we need one output register for the mcount argument.
2745 Likewise for -a profiling for the bb_init_func argument. For -ax
2746 profiling, we need two output registers for the two bb_init_trace_func
2747 arguments. */
2748 if (crtl->profile)
2749 i = MAX (i, 1);
2750 #endif
2751 current_frame_info.n_output_regs = i;
2753 /* ??? No rotating register support yet. */
2754 current_frame_info.n_rotate_regs = 0;
2756 /* Discover which registers need spilling, and how much room that
2757 will take. Begin with floating point and general registers,
2758 which will always wind up on the stack. */
2760 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2761 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2763 SET_HARD_REG_BIT (mask, regno);
2764 spill_size += 16;
2765 n_spilled += 1;
2766 spilled_fr_p = 1;
2769 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2770 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2772 SET_HARD_REG_BIT (mask, regno);
2773 spill_size += 8;
2774 n_spilled += 1;
2775 spilled_gr_p = 1;
2778 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2779 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2781 SET_HARD_REG_BIT (mask, regno);
2782 spill_size += 8;
2783 n_spilled += 1;
2786 /* Now come all special registers that might get saved in other
2787 general registers. */
2789 if (frame_pointer_needed)
2791 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2792 /* If we did not get a register, then we take LOC79. This is guaranteed
2793 to be free, even if regs_ever_live is already set, because this is
2794 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2795 as we don't count loc79 above. */
2796 if (current_frame_info.r[reg_fp] == 0)
2798 current_frame_info.r[reg_fp] = LOC_REG (79);
2799 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2803 if (! crtl->is_leaf)
2805 /* Emit a save of BR0 if we call other functions. Do this even
2806 if this function doesn't return, as EH depends on this to be
2807 able to unwind the stack. */
2808 SET_HARD_REG_BIT (mask, BR_REG (0));
2810 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2811 if (current_frame_info.r[reg_save_b0] == 0)
2813 extra_spill_size += 8;
2814 n_spilled += 1;
2817 /* Similarly for ar.pfs. */
2818 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2819 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2820 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2822 extra_spill_size += 8;
2823 n_spilled += 1;
2826 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2827 registers are clobbered, so we fall back to the stack. */
2828 current_frame_info.r[reg_save_gp]
2829 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2830 if (current_frame_info.r[reg_save_gp] == 0)
2832 SET_HARD_REG_BIT (mask, GR_REG (1));
2833 spill_size += 8;
2834 n_spilled += 1;
2837 else
2839 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2841 SET_HARD_REG_BIT (mask, BR_REG (0));
2842 extra_spill_size += 8;
2843 n_spilled += 1;
2846 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2848 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2849 current_frame_info.r[reg_save_ar_pfs]
2850 = find_gr_spill (reg_save_ar_pfs, 1);
2851 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2853 extra_spill_size += 8;
2854 n_spilled += 1;
2859 /* Unwind descriptor hackery: things are most efficient if we allocate
2860 consecutive GR save registers for RP, PFS, FP in that order. However,
2861 it is absolutely critical that FP get the only hard register that's
2862 guaranteed to be free, so we allocated it first. If all three did
2863 happen to be allocated hard regs, and are consecutive, rearrange them
2864 into the preferred order now.
2866 If we have already emitted code for any of those registers,
2867 then it's already too late to change. */
2868 min_regno = MIN (current_frame_info.r[reg_fp],
2869 MIN (current_frame_info.r[reg_save_b0],
2870 current_frame_info.r[reg_save_ar_pfs]));
2871 max_regno = MAX (current_frame_info.r[reg_fp],
2872 MAX (current_frame_info.r[reg_save_b0],
2873 current_frame_info.r[reg_save_ar_pfs]));
2874 if (min_regno > 0
2875 && min_regno + 2 == max_regno
2876 && (current_frame_info.r[reg_fp] == min_regno + 1
2877 || current_frame_info.r[reg_save_b0] == min_regno + 1
2878 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2879 && (emitted_frame_related_regs[reg_save_b0] == 0
2880 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2881 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2882 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2883 && (emitted_frame_related_regs[reg_fp] == 0
2884 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2886 current_frame_info.r[reg_save_b0] = min_regno;
2887 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2888 current_frame_info.r[reg_fp] = min_regno + 2;
2891 /* See if we need to store the predicate register block. */
2892 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2893 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2894 break;
2895 if (regno <= PR_REG (63))
2897 SET_HARD_REG_BIT (mask, PR_REG (0));
2898 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2899 if (current_frame_info.r[reg_save_pr] == 0)
2901 extra_spill_size += 8;
2902 n_spilled += 1;
2905 /* ??? Mark them all as used so that register renaming and such
2906 are free to use them. */
2907 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2908 df_set_regs_ever_live (regno, true);
2911 /* If we're forced to use st8.spill, we're forced to save and restore
2912 ar.unat as well. The check for existing liveness allows inline asm
2913 to touch ar.unat. */
2914 if (spilled_gr_p || cfun->machine->n_varargs
2915 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2917 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2918 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2919 current_frame_info.r[reg_save_ar_unat]
2920 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2921 if (current_frame_info.r[reg_save_ar_unat] == 0)
2923 extra_spill_size += 8;
2924 n_spilled += 1;
2928 if (df_regs_ever_live_p (AR_LC_REGNUM))
2930 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2931 current_frame_info.r[reg_save_ar_lc]
2932 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2933 if (current_frame_info.r[reg_save_ar_lc] == 0)
2935 extra_spill_size += 8;
2936 n_spilled += 1;
2940 /* If we have an odd number of words of pretend arguments written to
2941 the stack, then the FR save area will be unaligned. We round the
2942 size of this area up to keep things 16 byte aligned. */
2943 if (spilled_fr_p)
2944 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2945 else
2946 pretend_args_size = crtl->args.pretend_args_size;
2948 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2949 + crtl->outgoing_args_size);
2950 total_size = IA64_STACK_ALIGN (total_size);
2952 /* We always use the 16-byte scratch area provided by the caller, but
2953 if we are a leaf function, there's no one to which we need to provide
2954 a scratch area. However, if the function allocates dynamic stack space,
2955 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2956 so we need to cope. */
2957 if (crtl->is_leaf && !cfun->calls_alloca)
2958 total_size = MAX (0, total_size - 16);
2960 current_frame_info.total_size = total_size;
2961 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2962 current_frame_info.spill_size = spill_size;
2963 current_frame_info.extra_spill_size = extra_spill_size;
2964 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2965 current_frame_info.n_spilled = n_spilled;
2966 current_frame_info.initialized = reload_completed;
2969 /* Worker function for TARGET_CAN_ELIMINATE. */
2971 bool
2972 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2974 return (to == BR_REG (0) ? crtl->is_leaf : true);
2977 /* Compute the initial difference between the specified pair of registers. */
2979 HOST_WIDE_INT
2980 ia64_initial_elimination_offset (int from, int to)
2982 HOST_WIDE_INT offset;
2984 ia64_compute_frame_size (get_frame_size ());
2985 switch (from)
2987 case FRAME_POINTER_REGNUM:
2988 switch (to)
2990 case HARD_FRAME_POINTER_REGNUM:
2991 offset = -current_frame_info.total_size;
2992 if (!crtl->is_leaf || cfun->calls_alloca)
2993 offset += 16 + crtl->outgoing_args_size;
2994 break;
2996 case STACK_POINTER_REGNUM:
2997 offset = 0;
2998 if (!crtl->is_leaf || cfun->calls_alloca)
2999 offset += 16 + crtl->outgoing_args_size;
3000 break;
3002 default:
3003 gcc_unreachable ();
3005 break;
3007 case ARG_POINTER_REGNUM:
3008 /* Arguments start above the 16 byte save area, unless stdarg
3009 in which case we store through the 16 byte save area. */
3010 switch (to)
3012 case HARD_FRAME_POINTER_REGNUM:
3013 offset = 16 - crtl->args.pretend_args_size;
3014 break;
3016 case STACK_POINTER_REGNUM:
3017 offset = (current_frame_info.total_size
3018 + 16 - crtl->args.pretend_args_size);
3019 break;
3021 default:
3022 gcc_unreachable ();
3024 break;
3026 default:
3027 gcc_unreachable ();
3030 return offset;
3033 /* If there are more than a trivial number of register spills, we use
3034 two interleaved iterators so that we can get two memory references
3035 per insn group.
3037 In order to simplify things in the prologue and epilogue expanders,
3038 we use helper functions to fix up the memory references after the
3039 fact with the appropriate offsets to a POST_MODIFY memory mode.
3040 The following data structure tracks the state of the two iterators
3041 while insns are being emitted. */
3043 struct spill_fill_data
3045 rtx_insn *init_after; /* point at which to emit initializations */
3046 rtx init_reg[2]; /* initial base register */
3047 rtx iter_reg[2]; /* the iterator registers */
3048 rtx *prev_addr[2]; /* address of last memory use */
3049 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
3050 HOST_WIDE_INT prev_off[2]; /* last offset */
3051 int n_iter; /* number of iterators in use */
3052 int next_iter; /* next iterator to use */
3053 unsigned int save_gr_used_mask;
3056 static struct spill_fill_data spill_fill_data;
3058 static void
3059 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3061 int i;
3063 spill_fill_data.init_after = get_last_insn ();
3064 spill_fill_data.init_reg[0] = init_reg;
3065 spill_fill_data.init_reg[1] = init_reg;
3066 spill_fill_data.prev_addr[0] = NULL;
3067 spill_fill_data.prev_addr[1] = NULL;
3068 spill_fill_data.prev_insn[0] = NULL;
3069 spill_fill_data.prev_insn[1] = NULL;
3070 spill_fill_data.prev_off[0] = cfa_off;
3071 spill_fill_data.prev_off[1] = cfa_off;
3072 spill_fill_data.next_iter = 0;
3073 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3075 spill_fill_data.n_iter = 1 + (n_spills > 2);
3076 for (i = 0; i < spill_fill_data.n_iter; ++i)
3078 int regno = next_scratch_gr_reg ();
3079 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3080 current_frame_info.gr_used_mask |= 1 << regno;
3084 static void
3085 finish_spill_pointers (void)
3087 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3090 static rtx
3091 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3093 int iter = spill_fill_data.next_iter;
3094 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3095 rtx disp_rtx = GEN_INT (disp);
3096 rtx mem;
3098 if (spill_fill_data.prev_addr[iter])
3100 if (satisfies_constraint_N (disp_rtx))
3102 *spill_fill_data.prev_addr[iter]
3103 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3104 gen_rtx_PLUS (DImode,
3105 spill_fill_data.iter_reg[iter],
3106 disp_rtx));
3107 add_reg_note (spill_fill_data.prev_insn[iter],
3108 REG_INC, spill_fill_data.iter_reg[iter]);
3110 else
3112 /* ??? Could use register post_modify for loads. */
3113 if (!satisfies_constraint_I (disp_rtx))
3115 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3116 emit_move_insn (tmp, disp_rtx);
3117 disp_rtx = tmp;
3119 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3120 spill_fill_data.iter_reg[iter], disp_rtx));
3123 /* Micro-optimization: if we've created a frame pointer, it's at
3124 CFA 0, which may allow the real iterator to be initialized lower,
3125 slightly increasing parallelism. Also, if there are few saves
3126 it may eliminate the iterator entirely. */
3127 else if (disp == 0
3128 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3129 && frame_pointer_needed)
3131 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3132 set_mem_alias_set (mem, get_varargs_alias_set ());
3133 return mem;
3135 else
3137 rtx seq;
3138 rtx_insn *insn;
3140 if (disp == 0)
3141 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3142 spill_fill_data.init_reg[iter]);
3143 else
3145 start_sequence ();
3147 if (!satisfies_constraint_I (disp_rtx))
3149 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3150 emit_move_insn (tmp, disp_rtx);
3151 disp_rtx = tmp;
3154 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3155 spill_fill_data.init_reg[iter],
3156 disp_rtx));
3158 seq = get_insns ();
3159 end_sequence ();
3162 /* Careful for being the first insn in a sequence. */
3163 if (spill_fill_data.init_after)
3164 insn = emit_insn_after (seq, spill_fill_data.init_after);
3165 else
3167 rtx_insn *first = get_insns ();
3168 if (first)
3169 insn = emit_insn_before (seq, first);
3170 else
3171 insn = emit_insn (seq);
3173 spill_fill_data.init_after = insn;
3176 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3178 /* ??? Not all of the spills are for varargs, but some of them are.
3179 The rest of the spills belong in an alias set of their own. But
3180 it doesn't actually hurt to include them here. */
3181 set_mem_alias_set (mem, get_varargs_alias_set ());
3183 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3184 spill_fill_data.prev_off[iter] = cfa_off;
3186 if (++iter >= spill_fill_data.n_iter)
3187 iter = 0;
3188 spill_fill_data.next_iter = iter;
3190 return mem;
3193 static void
3194 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3195 rtx frame_reg)
3197 int iter = spill_fill_data.next_iter;
3198 rtx mem;
3199 rtx_insn *insn;
3201 mem = spill_restore_mem (reg, cfa_off);
3202 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3203 spill_fill_data.prev_insn[iter] = insn;
3205 if (frame_reg)
3207 rtx base;
3208 HOST_WIDE_INT off;
3210 RTX_FRAME_RELATED_P (insn) = 1;
3212 /* Don't even pretend that the unwind code can intuit its way
3213 through a pair of interleaved post_modify iterators. Just
3214 provide the correct answer. */
3216 if (frame_pointer_needed)
3218 base = hard_frame_pointer_rtx;
3219 off = - cfa_off;
3221 else
3223 base = stack_pointer_rtx;
3224 off = current_frame_info.total_size - cfa_off;
3227 add_reg_note (insn, REG_CFA_OFFSET,
3228 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3229 plus_constant (Pmode,
3230 base, off)),
3231 frame_reg));
3235 static void
3236 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3238 int iter = spill_fill_data.next_iter;
3239 rtx_insn *insn;
3241 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3242 GEN_INT (cfa_off)));
3243 spill_fill_data.prev_insn[iter] = insn;
3246 /* Wrapper functions that discards the CONST_INT spill offset. These
3247 exist so that we can give gr_spill/gr_fill the offset they need and
3248 use a consistent function interface. */
3250 static rtx
3251 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3253 return gen_movdi (dest, src);
3256 static rtx
3257 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3259 return gen_fr_spill (dest, src);
3262 static rtx
3263 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3265 return gen_fr_restore (dest, src);
3268 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3270 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3271 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3273 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3274 inclusive. These are offsets from the current stack pointer. BS_SIZE
3275 is the size of the backing store. ??? This clobbers r2 and r3. */
3277 static void
3278 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3279 int bs_size)
3281 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3282 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3283 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3285 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3286 of the Register Stack Engine. We also need to probe it after checking
3287 that the 2 stacks don't overlap. */
3288 emit_insn (gen_bsp_value (r3));
3289 emit_move_insn (r2, GEN_INT (-(first + size)));
3291 /* Compare current value of BSP and SP registers. */
3292 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3293 r3, stack_pointer_rtx)));
3295 /* Compute the address of the probe for the Backing Store (which grows
3296 towards higher addresses). We probe only at the first offset of
3297 the next page because some OS (eg Linux/ia64) only extend the
3298 backing store when this specific address is hit (but generate a SEGV
3299 on other address). Page size is the worst case (4KB). The reserve
3300 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3301 Also compute the address of the last probe for the memory stack
3302 (which grows towards lower addresses). */
3303 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3304 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3306 /* Compare them and raise SEGV if the former has topped the latter. */
3307 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3308 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3309 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3310 r3, r2))));
3311 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3312 const0_rtx),
3313 const0_rtx));
3314 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3315 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3316 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3317 GEN_INT (11))));
3319 /* Probe the Backing Store if necessary. */
3320 if (bs_size > 0)
3321 emit_stack_probe (r3);
3323 /* Probe the memory stack if necessary. */
3324 if (size == 0)
3327 /* See if we have a constant small number of probes to generate. If so,
3328 that's the easy case. */
3329 else if (size <= PROBE_INTERVAL)
3330 emit_stack_probe (r2);
3332 /* The run-time loop is made up of 9 insns in the generic case while this
3333 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3334 else if (size <= 4 * PROBE_INTERVAL)
3336 HOST_WIDE_INT i;
3338 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3339 emit_insn (gen_rtx_SET (r2,
3340 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3341 emit_stack_probe (r2);
3343 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3344 it exceeds SIZE. If only two probes are needed, this will not
3345 generate any code. Then probe at FIRST + SIZE. */
3346 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3348 emit_insn (gen_rtx_SET (r2,
3349 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3350 emit_stack_probe (r2);
3353 emit_insn (gen_rtx_SET (r2,
3354 plus_constant (Pmode, r2,
3355 (i - PROBE_INTERVAL) - size)));
3356 emit_stack_probe (r2);
3359 /* Otherwise, do the same as above, but in a loop. Note that we must be
3360 extra careful with variables wrapping around because we might be at
3361 the very top (or the very bottom) of the address space and we have
3362 to be able to handle this case properly; in particular, we use an
3363 equality test for the loop condition. */
3364 else
3366 HOST_WIDE_INT rounded_size;
3368 emit_move_insn (r2, GEN_INT (-first));
3371 /* Step 1: round SIZE to the previous multiple of the interval. */
3373 rounded_size = size & -PROBE_INTERVAL;
3376 /* Step 2: compute initial and final value of the loop counter. */
3378 /* TEST_ADDR = SP + FIRST. */
3379 emit_insn (gen_rtx_SET (r2,
3380 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3382 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3383 if (rounded_size > (1 << 21))
3385 emit_move_insn (r3, GEN_INT (-rounded_size));
3386 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3388 else
3389 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3390 GEN_INT (-rounded_size))));
3393 /* Step 3: the loop
3397 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3398 probe at TEST_ADDR
3400 while (TEST_ADDR != LAST_ADDR)
3402 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3403 until it is equal to ROUNDED_SIZE. */
3405 emit_insn (gen_probe_stack_range (r2, r2, r3));
3408 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3409 that SIZE is equal to ROUNDED_SIZE. */
3411 /* TEMP = SIZE - ROUNDED_SIZE. */
3412 if (size != rounded_size)
3414 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3415 rounded_size - size)));
3416 emit_stack_probe (r2);
3420 /* Make sure nothing is scheduled before we are done. */
3421 emit_insn (gen_blockage ());
3424 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3425 absolute addresses. */
3427 const char *
3428 output_probe_stack_range (rtx reg1, rtx reg2)
3430 static int labelno = 0;
3431 char loop_lab[32];
3432 rtx xops[3];
3434 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3436 /* Loop. */
3437 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3439 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3440 xops[0] = reg1;
3441 xops[1] = GEN_INT (-PROBE_INTERVAL);
3442 output_asm_insn ("addl %0 = %1, %0", xops);
3443 fputs ("\t;;\n", asm_out_file);
3445 /* Probe at TEST_ADDR. */
3446 output_asm_insn ("probe.w.fault %0, 0", xops);
3448 /* Test if TEST_ADDR == LAST_ADDR. */
3449 xops[1] = reg2;
3450 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3451 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3453 /* Branch. */
3454 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3455 assemble_name_raw (asm_out_file, loop_lab);
3456 fputc ('\n', asm_out_file);
3458 return "";
3461 /* Called after register allocation to add any instructions needed for the
3462 prologue. Using a prologue insn is favored compared to putting all of the
3463 instructions in output_function_prologue(), since it allows the scheduler
3464 to intermix instructions with the saves of the caller saved registers. In
3465 some cases, it might be necessary to emit a barrier instruction as the last
3466 insn to prevent such scheduling.
3468 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3469 so that the debug info generation code can handle them properly.
3471 The register save area is laid out like so:
3472 cfa+16
3473 [ varargs spill area ]
3474 [ fr register spill area ]
3475 [ br register spill area ]
3476 [ ar register spill area ]
3477 [ pr register spill area ]
3478 [ gr register spill area ] */
3480 /* ??? Get inefficient code when the frame size is larger than can fit in an
3481 adds instruction. */
3483 void
3484 ia64_expand_prologue (void)
3486 rtx_insn *insn;
3487 rtx ar_pfs_save_reg, ar_unat_save_reg;
3488 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3489 rtx reg, alt_reg;
3491 ia64_compute_frame_size (get_frame_size ());
3492 last_scratch_gr_reg = 15;
3494 if (flag_stack_usage_info)
3495 current_function_static_stack_size = current_frame_info.total_size;
3497 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3499 HOST_WIDE_INT size = current_frame_info.total_size;
3500 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3501 + current_frame_info.n_local_regs);
3503 if (crtl->is_leaf && !cfun->calls_alloca)
3505 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
3506 ia64_emit_probe_stack_range (get_stack_check_protect (),
3507 size - get_stack_check_protect (),
3508 bs_size);
3509 else if (size + bs_size > get_stack_check_protect ())
3510 ia64_emit_probe_stack_range (get_stack_check_protect (),
3511 0, bs_size);
3513 else if (size + bs_size > 0)
3514 ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
3517 if (dump_file)
3519 fprintf (dump_file, "ia64 frame related registers "
3520 "recorded in current_frame_info.r[]:\n");
3521 #define PRINTREG(a) if (current_frame_info.r[a]) \
3522 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3523 PRINTREG(reg_fp);
3524 PRINTREG(reg_save_b0);
3525 PRINTREG(reg_save_pr);
3526 PRINTREG(reg_save_ar_pfs);
3527 PRINTREG(reg_save_ar_unat);
3528 PRINTREG(reg_save_ar_lc);
3529 PRINTREG(reg_save_gp);
3530 #undef PRINTREG
3533 /* If there is no epilogue, then we don't need some prologue insns.
3534 We need to avoid emitting the dead prologue insns, because flow
3535 will complain about them. */
3536 if (optimize)
3538 edge e;
3539 edge_iterator ei;
3541 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3542 if ((e->flags & EDGE_FAKE) == 0
3543 && (e->flags & EDGE_FALLTHRU) != 0)
3544 break;
3545 epilogue_p = (e != NULL);
3547 else
3548 epilogue_p = 1;
3550 /* Set the local, input, and output register names. We need to do this
3551 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3552 half. If we use in/loc/out register names, then we get assembler errors
3553 in crtn.S because there is no alloc insn or regstk directive in there. */
3554 if (! TARGET_REG_NAMES)
3556 int inputs = current_frame_info.n_input_regs;
3557 int locals = current_frame_info.n_local_regs;
3558 int outputs = current_frame_info.n_output_regs;
3560 for (i = 0; i < inputs; i++)
3561 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3562 for (i = 0; i < locals; i++)
3563 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3564 for (i = 0; i < outputs; i++)
3565 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3568 /* Set the frame pointer register name. The regnum is logically loc79,
3569 but of course we'll not have allocated that many locals. Rather than
3570 worrying about renumbering the existing rtxs, we adjust the name. */
3571 /* ??? This code means that we can never use one local register when
3572 there is a frame pointer. loc79 gets wasted in this case, as it is
3573 renamed to a register that will never be used. See also the try_locals
3574 code in find_gr_spill. */
3575 if (current_frame_info.r[reg_fp])
3577 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3578 reg_names[HARD_FRAME_POINTER_REGNUM]
3579 = reg_names[current_frame_info.r[reg_fp]];
3580 reg_names[current_frame_info.r[reg_fp]] = tmp;
3583 /* We don't need an alloc instruction if we've used no outputs or locals. */
3584 if (current_frame_info.n_local_regs == 0
3585 && current_frame_info.n_output_regs == 0
3586 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3587 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3589 /* If there is no alloc, but there are input registers used, then we
3590 need a .regstk directive. */
3591 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3592 ar_pfs_save_reg = NULL_RTX;
3594 else
3596 current_frame_info.need_regstk = 0;
3598 if (current_frame_info.r[reg_save_ar_pfs])
3600 regno = current_frame_info.r[reg_save_ar_pfs];
3601 reg_emitted (reg_save_ar_pfs);
3603 else
3604 regno = next_scratch_gr_reg ();
3605 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3607 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3608 GEN_INT (current_frame_info.n_input_regs),
3609 GEN_INT (current_frame_info.n_local_regs),
3610 GEN_INT (current_frame_info.n_output_regs),
3611 GEN_INT (current_frame_info.n_rotate_regs)));
3612 if (current_frame_info.r[reg_save_ar_pfs])
3614 RTX_FRAME_RELATED_P (insn) = 1;
3615 add_reg_note (insn, REG_CFA_REGISTER,
3616 gen_rtx_SET (ar_pfs_save_reg,
3617 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3621 /* Set up frame pointer, stack pointer, and spill iterators. */
3623 n_varargs = cfun->machine->n_varargs;
3624 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3625 stack_pointer_rtx, 0);
3627 if (frame_pointer_needed)
3629 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3630 RTX_FRAME_RELATED_P (insn) = 1;
3632 /* Force the unwind info to recognize this as defining a new CFA,
3633 rather than some temp register setup. */
3634 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3637 if (current_frame_info.total_size != 0)
3639 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3640 rtx offset;
3642 if (satisfies_constraint_I (frame_size_rtx))
3643 offset = frame_size_rtx;
3644 else
3646 regno = next_scratch_gr_reg ();
3647 offset = gen_rtx_REG (DImode, regno);
3648 emit_move_insn (offset, frame_size_rtx);
3651 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3652 stack_pointer_rtx, offset));
3654 if (! frame_pointer_needed)
3656 RTX_FRAME_RELATED_P (insn) = 1;
3657 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3658 gen_rtx_SET (stack_pointer_rtx,
3659 gen_rtx_PLUS (DImode,
3660 stack_pointer_rtx,
3661 frame_size_rtx)));
3664 /* ??? At this point we must generate a magic insn that appears to
3665 modify the stack pointer, the frame pointer, and all spill
3666 iterators. This would allow the most scheduling freedom. For
3667 now, just hard stop. */
3668 emit_insn (gen_blockage ());
3671 /* Must copy out ar.unat before doing any integer spills. */
3672 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3674 if (current_frame_info.r[reg_save_ar_unat])
3676 ar_unat_save_reg
3677 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3678 reg_emitted (reg_save_ar_unat);
3680 else
3682 alt_regno = next_scratch_gr_reg ();
3683 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3684 current_frame_info.gr_used_mask |= 1 << alt_regno;
3687 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3688 insn = emit_move_insn (ar_unat_save_reg, reg);
3689 if (current_frame_info.r[reg_save_ar_unat])
3691 RTX_FRAME_RELATED_P (insn) = 1;
3692 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3695 /* Even if we're not going to generate an epilogue, we still
3696 need to save the register so that EH works. */
3697 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3698 emit_insn (gen_prologue_use (ar_unat_save_reg));
3700 else
3701 ar_unat_save_reg = NULL_RTX;
3703 /* Spill all varargs registers. Do this before spilling any GR registers,
3704 since we want the UNAT bits for the GR registers to override the UNAT
3705 bits from varargs, which we don't care about. */
3707 cfa_off = -16;
3708 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3710 reg = gen_rtx_REG (DImode, regno);
3711 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3714 /* Locate the bottom of the register save area. */
3715 cfa_off = (current_frame_info.spill_cfa_off
3716 + current_frame_info.spill_size
3717 + current_frame_info.extra_spill_size);
3719 /* Save the predicate register block either in a register or in memory. */
3720 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3722 reg = gen_rtx_REG (DImode, PR_REG (0));
3723 if (current_frame_info.r[reg_save_pr] != 0)
3725 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3726 reg_emitted (reg_save_pr);
3727 insn = emit_move_insn (alt_reg, reg);
3729 /* ??? Denote pr spill/fill by a DImode move that modifies all
3730 64 hard registers. */
3731 RTX_FRAME_RELATED_P (insn) = 1;
3732 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3734 /* Even if we're not going to generate an epilogue, we still
3735 need to save the register so that EH works. */
3736 if (! epilogue_p)
3737 emit_insn (gen_prologue_use (alt_reg));
3739 else
3741 alt_regno = next_scratch_gr_reg ();
3742 alt_reg = gen_rtx_REG (DImode, alt_regno);
3743 insn = emit_move_insn (alt_reg, reg);
3744 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3745 cfa_off -= 8;
3749 /* Handle AR regs in numerical order. All of them get special handling. */
3750 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3751 && current_frame_info.r[reg_save_ar_unat] == 0)
3753 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3754 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3755 cfa_off -= 8;
3758 /* The alloc insn already copied ar.pfs into a general register. The
3759 only thing we have to do now is copy that register to a stack slot
3760 if we'd not allocated a local register for the job. */
3761 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3762 && current_frame_info.r[reg_save_ar_pfs] == 0)
3764 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3765 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3766 cfa_off -= 8;
3769 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3771 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3772 if (current_frame_info.r[reg_save_ar_lc] != 0)
3774 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3775 reg_emitted (reg_save_ar_lc);
3776 insn = emit_move_insn (alt_reg, reg);
3777 RTX_FRAME_RELATED_P (insn) = 1;
3778 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3780 /* Even if we're not going to generate an epilogue, we still
3781 need to save the register so that EH works. */
3782 if (! epilogue_p)
3783 emit_insn (gen_prologue_use (alt_reg));
3785 else
3787 alt_regno = next_scratch_gr_reg ();
3788 alt_reg = gen_rtx_REG (DImode, alt_regno);
3789 emit_move_insn (alt_reg, reg);
3790 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3791 cfa_off -= 8;
3795 /* Save the return pointer. */
3796 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3798 reg = gen_rtx_REG (DImode, BR_REG (0));
3799 if (current_frame_info.r[reg_save_b0] != 0)
3801 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3802 reg_emitted (reg_save_b0);
3803 insn = emit_move_insn (alt_reg, reg);
3804 RTX_FRAME_RELATED_P (insn) = 1;
3805 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3807 /* Even if we're not going to generate an epilogue, we still
3808 need to save the register so that EH works. */
3809 if (! epilogue_p)
3810 emit_insn (gen_prologue_use (alt_reg));
3812 else
3814 alt_regno = next_scratch_gr_reg ();
3815 alt_reg = gen_rtx_REG (DImode, alt_regno);
3816 emit_move_insn (alt_reg, reg);
3817 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3818 cfa_off -= 8;
3822 if (current_frame_info.r[reg_save_gp])
3824 reg_emitted (reg_save_gp);
3825 insn = emit_move_insn (gen_rtx_REG (DImode,
3826 current_frame_info.r[reg_save_gp]),
3827 pic_offset_table_rtx);
3830 /* We should now be at the base of the gr/br/fr spill area. */
3831 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3832 + current_frame_info.spill_size));
3834 /* Spill all general registers. */
3835 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3836 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3838 reg = gen_rtx_REG (DImode, regno);
3839 do_spill (gen_gr_spill, reg, cfa_off, reg);
3840 cfa_off -= 8;
3843 /* Spill the rest of the BR registers. */
3844 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3845 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3847 alt_regno = next_scratch_gr_reg ();
3848 alt_reg = gen_rtx_REG (DImode, alt_regno);
3849 reg = gen_rtx_REG (DImode, regno);
3850 emit_move_insn (alt_reg, reg);
3851 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3852 cfa_off -= 8;
3855 /* Align the frame and spill all FR registers. */
3856 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3857 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3859 gcc_assert (!(cfa_off & 15));
3860 reg = gen_rtx_REG (XFmode, regno);
3861 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3862 cfa_off -= 16;
3865 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3867 finish_spill_pointers ();
3870 /* Output the textual info surrounding the prologue. */
3872 void
3873 ia64_start_function (FILE *file, const char *fnname,
3874 tree decl ATTRIBUTE_UNUSED)
3876 #if TARGET_ABI_OPEN_VMS
3877 vms_start_function (fnname);
3878 #endif
3880 fputs ("\t.proc ", file);
3881 assemble_name (file, fnname);
3882 fputc ('\n', file);
3883 ASM_OUTPUT_LABEL (file, fnname);
3886 /* Called after register allocation to add any instructions needed for the
3887 epilogue. Using an epilogue insn is favored compared to putting all of the
3888 instructions in output_function_prologue(), since it allows the scheduler
3889 to intermix instructions with the saves of the caller saved registers. In
3890 some cases, it might be necessary to emit a barrier instruction as the last
3891 insn to prevent such scheduling. */
3893 void
3894 ia64_expand_epilogue (int sibcall_p)
3896 rtx_insn *insn;
3897 rtx reg, alt_reg, ar_unat_save_reg;
3898 int regno, alt_regno, cfa_off;
3900 ia64_compute_frame_size (get_frame_size ());
3902 /* If there is a frame pointer, then we use it instead of the stack
3903 pointer, so that the stack pointer does not need to be valid when
3904 the epilogue starts. See EXIT_IGNORE_STACK. */
3905 if (frame_pointer_needed)
3906 setup_spill_pointers (current_frame_info.n_spilled,
3907 hard_frame_pointer_rtx, 0);
3908 else
3909 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3910 current_frame_info.total_size);
3912 if (current_frame_info.total_size != 0)
3914 /* ??? At this point we must generate a magic insn that appears to
3915 modify the spill iterators and the frame pointer. This would
3916 allow the most scheduling freedom. For now, just hard stop. */
3917 emit_insn (gen_blockage ());
3920 /* Locate the bottom of the register save area. */
3921 cfa_off = (current_frame_info.spill_cfa_off
3922 + current_frame_info.spill_size
3923 + current_frame_info.extra_spill_size);
3925 /* Restore the predicate registers. */
3926 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3928 if (current_frame_info.r[reg_save_pr] != 0)
3930 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3931 reg_emitted (reg_save_pr);
3933 else
3935 alt_regno = next_scratch_gr_reg ();
3936 alt_reg = gen_rtx_REG (DImode, alt_regno);
3937 do_restore (gen_movdi_x, alt_reg, cfa_off);
3938 cfa_off -= 8;
3940 reg = gen_rtx_REG (DImode, PR_REG (0));
3941 emit_move_insn (reg, alt_reg);
3944 /* Restore the application registers. */
3946 /* Load the saved unat from the stack, but do not restore it until
3947 after the GRs have been restored. */
3948 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3950 if (current_frame_info.r[reg_save_ar_unat] != 0)
3952 ar_unat_save_reg
3953 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3954 reg_emitted (reg_save_ar_unat);
3956 else
3958 alt_regno = next_scratch_gr_reg ();
3959 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3960 current_frame_info.gr_used_mask |= 1 << alt_regno;
3961 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3962 cfa_off -= 8;
3965 else
3966 ar_unat_save_reg = NULL_RTX;
3968 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3970 reg_emitted (reg_save_ar_pfs);
3971 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3972 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3973 emit_move_insn (reg, alt_reg);
3975 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3977 alt_regno = next_scratch_gr_reg ();
3978 alt_reg = gen_rtx_REG (DImode, alt_regno);
3979 do_restore (gen_movdi_x, alt_reg, cfa_off);
3980 cfa_off -= 8;
3981 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3982 emit_move_insn (reg, alt_reg);
3985 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3987 if (current_frame_info.r[reg_save_ar_lc] != 0)
3989 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3990 reg_emitted (reg_save_ar_lc);
3992 else
3994 alt_regno = next_scratch_gr_reg ();
3995 alt_reg = gen_rtx_REG (DImode, alt_regno);
3996 do_restore (gen_movdi_x, alt_reg, cfa_off);
3997 cfa_off -= 8;
3999 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4000 emit_move_insn (reg, alt_reg);
4003 /* Restore the return pointer. */
4004 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4006 if (current_frame_info.r[reg_save_b0] != 0)
4008 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4009 reg_emitted (reg_save_b0);
4011 else
4013 alt_regno = next_scratch_gr_reg ();
4014 alt_reg = gen_rtx_REG (DImode, alt_regno);
4015 do_restore (gen_movdi_x, alt_reg, cfa_off);
4016 cfa_off -= 8;
4018 reg = gen_rtx_REG (DImode, BR_REG (0));
4019 emit_move_insn (reg, alt_reg);
4022 /* We should now be at the base of the gr/br/fr spill area. */
4023 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4024 + current_frame_info.spill_size));
4026 /* The GP may be stored on the stack in the prologue, but it's
4027 never restored in the epilogue. Skip the stack slot. */
4028 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4029 cfa_off -= 8;
4031 /* Restore all general registers. */
4032 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4033 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4035 reg = gen_rtx_REG (DImode, regno);
4036 do_restore (gen_gr_restore, reg, cfa_off);
4037 cfa_off -= 8;
4040 /* Restore the branch registers. */
4041 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4042 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4044 alt_regno = next_scratch_gr_reg ();
4045 alt_reg = gen_rtx_REG (DImode, alt_regno);
4046 do_restore (gen_movdi_x, alt_reg, cfa_off);
4047 cfa_off -= 8;
4048 reg = gen_rtx_REG (DImode, regno);
4049 emit_move_insn (reg, alt_reg);
4052 /* Restore floating point registers. */
4053 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4054 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4056 gcc_assert (!(cfa_off & 15));
4057 reg = gen_rtx_REG (XFmode, regno);
4058 do_restore (gen_fr_restore_x, reg, cfa_off);
4059 cfa_off -= 16;
4062 /* Restore ar.unat for real. */
4063 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4065 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4066 emit_move_insn (reg, ar_unat_save_reg);
4069 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4071 finish_spill_pointers ();
4073 if (current_frame_info.total_size
4074 || cfun->machine->ia64_eh_epilogue_sp
4075 || frame_pointer_needed)
4077 /* ??? At this point we must generate a magic insn that appears to
4078 modify the spill iterators, the stack pointer, and the frame
4079 pointer. This would allow the most scheduling freedom. For now,
4080 just hard stop. */
4081 emit_insn (gen_blockage ());
4084 if (cfun->machine->ia64_eh_epilogue_sp)
4085 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4086 else if (frame_pointer_needed)
4088 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4089 RTX_FRAME_RELATED_P (insn) = 1;
4090 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4092 else if (current_frame_info.total_size)
4094 rtx offset, frame_size_rtx;
4096 frame_size_rtx = GEN_INT (current_frame_info.total_size);
4097 if (satisfies_constraint_I (frame_size_rtx))
4098 offset = frame_size_rtx;
4099 else
4101 regno = next_scratch_gr_reg ();
4102 offset = gen_rtx_REG (DImode, regno);
4103 emit_move_insn (offset, frame_size_rtx);
4106 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4107 offset));
4109 RTX_FRAME_RELATED_P (insn) = 1;
4110 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4111 gen_rtx_SET (stack_pointer_rtx,
4112 gen_rtx_PLUS (DImode,
4113 stack_pointer_rtx,
4114 frame_size_rtx)));
4117 if (cfun->machine->ia64_eh_epilogue_bsp)
4118 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4120 if (! sibcall_p)
4121 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4122 else
4124 int fp = GR_REG (2);
4125 /* We need a throw away register here, r0 and r1 are reserved,
4126 so r2 is the first available call clobbered register. If
4127 there was a frame_pointer register, we may have swapped the
4128 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4129 sure we're using the string "r2" when emitting the register
4130 name for the assembler. */
4131 if (current_frame_info.r[reg_fp]
4132 && current_frame_info.r[reg_fp] == GR_REG (2))
4133 fp = HARD_FRAME_POINTER_REGNUM;
4135 /* We must emit an alloc to force the input registers to become output
4136 registers. Otherwise, if the callee tries to pass its parameters
4137 through to another call without an intervening alloc, then these
4138 values get lost. */
4139 /* ??? We don't need to preserve all input registers. We only need to
4140 preserve those input registers used as arguments to the sibling call.
4141 It is unclear how to compute that number here. */
4142 if (current_frame_info.n_input_regs != 0)
4144 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4146 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4147 const0_rtx, const0_rtx,
4148 n_inputs, const0_rtx));
4149 RTX_FRAME_RELATED_P (insn) = 1;
4151 /* ??? We need to mark the alloc as frame-related so that it gets
4152 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4153 But there's nothing dwarf2 related to be done wrt the register
4154 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4155 the empty parallel means dwarf2out will not see anything. */
4156 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4157 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4162 /* Return 1 if br.ret can do all the work required to return from a
4163 function. */
4166 ia64_direct_return (void)
4168 if (reload_completed && ! frame_pointer_needed)
4170 ia64_compute_frame_size (get_frame_size ());
4172 return (current_frame_info.total_size == 0
4173 && current_frame_info.n_spilled == 0
4174 && current_frame_info.r[reg_save_b0] == 0
4175 && current_frame_info.r[reg_save_pr] == 0
4176 && current_frame_info.r[reg_save_ar_pfs] == 0
4177 && current_frame_info.r[reg_save_ar_unat] == 0
4178 && current_frame_info.r[reg_save_ar_lc] == 0);
4180 return 0;
4183 /* Return the magic cookie that we use to hold the return address
4184 during early compilation. */
4187 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4189 if (count != 0)
4190 return NULL;
4191 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4194 /* Split this value after reload, now that we know where the return
4195 address is saved. */
4197 void
4198 ia64_split_return_addr_rtx (rtx dest)
4200 rtx src;
4202 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4204 if (current_frame_info.r[reg_save_b0] != 0)
4206 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4207 reg_emitted (reg_save_b0);
4209 else
4211 HOST_WIDE_INT off;
4212 unsigned int regno;
4213 rtx off_r;
4215 /* Compute offset from CFA for BR0. */
4216 /* ??? Must be kept in sync with ia64_expand_prologue. */
4217 off = (current_frame_info.spill_cfa_off
4218 + current_frame_info.spill_size);
4219 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4220 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4221 off -= 8;
4223 /* Convert CFA offset to a register based offset. */
4224 if (frame_pointer_needed)
4225 src = hard_frame_pointer_rtx;
4226 else
4228 src = stack_pointer_rtx;
4229 off += current_frame_info.total_size;
4232 /* Load address into scratch register. */
4233 off_r = GEN_INT (off);
4234 if (satisfies_constraint_I (off_r))
4235 emit_insn (gen_adddi3 (dest, src, off_r));
4236 else
4238 emit_move_insn (dest, off_r);
4239 emit_insn (gen_adddi3 (dest, src, dest));
4242 src = gen_rtx_MEM (Pmode, dest);
4245 else
4246 src = gen_rtx_REG (DImode, BR_REG (0));
4248 emit_move_insn (dest, src);
4252 ia64_hard_regno_rename_ok (int from, int to)
4254 /* Don't clobber any of the registers we reserved for the prologue. */
4255 unsigned int r;
4257 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4258 if (to == current_frame_info.r[r]
4259 || from == current_frame_info.r[r]
4260 || to == emitted_frame_related_regs[r]
4261 || from == emitted_frame_related_regs[r])
4262 return 0;
4264 /* Don't use output registers outside the register frame. */
4265 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4266 return 0;
4268 /* Retain even/oddness on predicate register pairs. */
4269 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4270 return (from & 1) == (to & 1);
4272 return 1;
4275 /* Implement TARGET_HARD_REGNO_NREGS.
4277 ??? We say that BImode PR values require two registers. This allows us to
4278 easily store the normal and inverted values. We use CCImode to indicate
4279 a single predicate register. */
4281 static unsigned int
4282 ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
4284 if (regno == PR_REG (0) && mode == DImode)
4285 return 64;
4286 if (PR_REGNO_P (regno) && (mode) == BImode)
4287 return 2;
4288 if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
4289 return 1;
4290 if (FR_REGNO_P (regno) && mode == XFmode)
4291 return 1;
4292 if (FR_REGNO_P (regno) && mode == RFmode)
4293 return 1;
4294 if (FR_REGNO_P (regno) && mode == XCmode)
4295 return 2;
4296 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
4299 /* Implement TARGET_HARD_REGNO_MODE_OK. */
4301 static bool
4302 ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
4304 if (FR_REGNO_P (regno))
4305 return (GET_MODE_CLASS (mode) != MODE_CC
4306 && mode != BImode
4307 && mode != TFmode);
4309 if (PR_REGNO_P (regno))
4310 return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
4312 if (GR_REGNO_P (regno))
4313 return mode != XFmode && mode != XCmode && mode != RFmode;
4315 if (AR_REGNO_P (regno))
4316 return mode == DImode;
4318 if (BR_REGNO_P (regno))
4319 return mode == DImode;
4321 return false;
4324 /* Implement TARGET_MODES_TIEABLE_P.
4326 Don't tie integer and FP modes, as that causes us to get integer registers
4327 allocated for FP instructions. XFmode only supported in FP registers so
4328 we can't tie it with any other modes. */
4330 static bool
4331 ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
4333 return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
4334 && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
4335 == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
4336 && (mode1 == BImode) == (mode2 == BImode));
4339 /* Target hook for assembling integer objects. Handle word-sized
4340 aligned objects and detect the cases when @fptr is needed. */
4342 static bool
4343 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4345 if (size == POINTER_SIZE / BITS_PER_UNIT
4346 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4347 && GET_CODE (x) == SYMBOL_REF
4348 && SYMBOL_REF_FUNCTION_P (x))
4350 static const char * const directive[2][2] = {
4351 /* 64-bit pointer */ /* 32-bit pointer */
4352 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4353 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4355 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4356 output_addr_const (asm_out_file, x);
4357 fputs (")\n", asm_out_file);
4358 return true;
4360 return default_assemble_integer (x, size, aligned_p);
4363 /* Emit the function prologue. */
4365 static void
4366 ia64_output_function_prologue (FILE *file)
4368 int mask, grsave, grsave_prev;
4370 if (current_frame_info.need_regstk)
4371 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4372 current_frame_info.n_input_regs,
4373 current_frame_info.n_local_regs,
4374 current_frame_info.n_output_regs,
4375 current_frame_info.n_rotate_regs);
4377 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4378 return;
4380 /* Emit the .prologue directive. */
4382 mask = 0;
4383 grsave = grsave_prev = 0;
4384 if (current_frame_info.r[reg_save_b0] != 0)
4386 mask |= 8;
4387 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4389 if (current_frame_info.r[reg_save_ar_pfs] != 0
4390 && (grsave_prev == 0
4391 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4393 mask |= 4;
4394 if (grsave_prev == 0)
4395 grsave = current_frame_info.r[reg_save_ar_pfs];
4396 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4398 if (current_frame_info.r[reg_fp] != 0
4399 && (grsave_prev == 0
4400 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4402 mask |= 2;
4403 if (grsave_prev == 0)
4404 grsave = HARD_FRAME_POINTER_REGNUM;
4405 grsave_prev = current_frame_info.r[reg_fp];
4407 if (current_frame_info.r[reg_save_pr] != 0
4408 && (grsave_prev == 0
4409 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4411 mask |= 1;
4412 if (grsave_prev == 0)
4413 grsave = current_frame_info.r[reg_save_pr];
4416 if (mask && TARGET_GNU_AS)
4417 fprintf (file, "\t.prologue %d, %d\n", mask,
4418 ia64_dbx_register_number (grsave));
4419 else
4420 fputs ("\t.prologue\n", file);
4422 /* Emit a .spill directive, if necessary, to relocate the base of
4423 the register spill area. */
4424 if (current_frame_info.spill_cfa_off != -16)
4425 fprintf (file, "\t.spill %ld\n",
4426 (long) (current_frame_info.spill_cfa_off
4427 + current_frame_info.spill_size));
4430 /* Emit the .body directive at the scheduled end of the prologue. */
4432 static void
4433 ia64_output_function_end_prologue (FILE *file)
4435 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4436 return;
4438 fputs ("\t.body\n", file);
4441 /* Emit the function epilogue. */
4443 static void
4444 ia64_output_function_epilogue (FILE *)
4446 int i;
4448 if (current_frame_info.r[reg_fp])
4450 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4451 reg_names[HARD_FRAME_POINTER_REGNUM]
4452 = reg_names[current_frame_info.r[reg_fp]];
4453 reg_names[current_frame_info.r[reg_fp]] = tmp;
4454 reg_emitted (reg_fp);
4456 if (! TARGET_REG_NAMES)
4458 for (i = 0; i < current_frame_info.n_input_regs; i++)
4459 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4460 for (i = 0; i < current_frame_info.n_local_regs; i++)
4461 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4462 for (i = 0; i < current_frame_info.n_output_regs; i++)
4463 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4466 current_frame_info.initialized = 0;
4470 ia64_dbx_register_number (int regno)
4472 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4473 from its home at loc79 to something inside the register frame. We
4474 must perform the same renumbering here for the debug info. */
4475 if (current_frame_info.r[reg_fp])
4477 if (regno == HARD_FRAME_POINTER_REGNUM)
4478 regno = current_frame_info.r[reg_fp];
4479 else if (regno == current_frame_info.r[reg_fp])
4480 regno = HARD_FRAME_POINTER_REGNUM;
4483 if (IN_REGNO_P (regno))
4484 return 32 + regno - IN_REG (0);
4485 else if (LOC_REGNO_P (regno))
4486 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4487 else if (OUT_REGNO_P (regno))
4488 return (32 + current_frame_info.n_input_regs
4489 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4490 else
4491 return regno;
4494 /* Implement TARGET_TRAMPOLINE_INIT.
4496 The trampoline should set the static chain pointer to value placed
4497 into the trampoline and should branch to the specified routine.
4498 To make the normal indirect-subroutine calling convention work,
4499 the trampoline must look like a function descriptor; the first
4500 word being the target address and the second being the target's
4501 global pointer.
4503 We abuse the concept of a global pointer by arranging for it
4504 to point to the data we need to load. The complete trampoline
4505 has the following form:
4507 +-------------------+ \
4508 TRAMP: | __ia64_trampoline | |
4509 +-------------------+ > fake function descriptor
4510 | TRAMP+16 | |
4511 +-------------------+ /
4512 | target descriptor |
4513 +-------------------+
4514 | static link |
4515 +-------------------+
4518 static void
4519 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4521 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4522 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4524 /* The Intel assembler requires that the global __ia64_trampoline symbol
4525 be declared explicitly */
4526 if (!TARGET_GNU_AS)
4528 static bool declared_ia64_trampoline = false;
4530 if (!declared_ia64_trampoline)
4532 declared_ia64_trampoline = true;
4533 (*targetm.asm_out.globalize_label) (asm_out_file,
4534 "__ia64_trampoline");
4538 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4539 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4540 fnaddr = convert_memory_address (Pmode, fnaddr);
4541 static_chain = convert_memory_address (Pmode, static_chain);
4543 /* Load up our iterator. */
4544 addr_reg = copy_to_reg (addr);
4545 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4547 /* The first two words are the fake descriptor:
4548 __ia64_trampoline, ADDR+16. */
4549 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4550 if (TARGET_ABI_OPEN_VMS)
4552 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4553 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4554 relocation against function symbols to make it identical to the
4555 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4556 strict ELF and dereference to get the bare code address. */
4557 rtx reg = gen_reg_rtx (Pmode);
4558 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4559 emit_move_insn (reg, tramp);
4560 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4561 tramp = reg;
4563 emit_move_insn (m_tramp, tramp);
4564 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4565 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4567 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4568 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4569 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4571 /* The third word is the target descriptor. */
4572 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4573 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4574 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4576 /* The fourth word is the static chain. */
4577 emit_move_insn (m_tramp, static_chain);
4580 /* Do any needed setup for a variadic function. CUM has not been updated
4581 for the last named argument which has type TYPE and mode MODE.
4583 We generate the actual spill instructions during prologue generation. */
4585 static void
4586 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4587 tree type, int * pretend_size,
4588 int second_time ATTRIBUTE_UNUSED)
4590 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4592 /* Skip the current argument. */
4593 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4595 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4597 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4598 *pretend_size = n * UNITS_PER_WORD;
4599 cfun->machine->n_varargs = n;
4603 /* Check whether TYPE is a homogeneous floating point aggregate. If
4604 it is, return the mode of the floating point type that appears
4605 in all leafs. If it is not, return VOIDmode.
4607 An aggregate is a homogeneous floating point aggregate is if all
4608 fields/elements in it have the same floating point type (e.g,
4609 SFmode). 128-bit quad-precision floats are excluded.
4611 Variable sized aggregates should never arrive here, since we should
4612 have already decided to pass them by reference. Top-level zero-sized
4613 aggregates are excluded because our parallels crash the middle-end. */
4615 static machine_mode
4616 hfa_element_mode (const_tree type, bool nested)
4618 machine_mode element_mode = VOIDmode;
4619 machine_mode mode;
4620 enum tree_code code = TREE_CODE (type);
4621 int know_element_mode = 0;
4622 tree t;
4624 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4625 return VOIDmode;
4627 switch (code)
4629 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4630 case BOOLEAN_TYPE: case POINTER_TYPE:
4631 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4632 case LANG_TYPE: case FUNCTION_TYPE:
4633 return VOIDmode;
4635 /* Fortran complex types are supposed to be HFAs, so we need to handle
4636 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4637 types though. */
4638 case COMPLEX_TYPE:
4639 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4640 && TYPE_MODE (type) != TCmode)
4641 return GET_MODE_INNER (TYPE_MODE (type));
4642 else
4643 return VOIDmode;
4645 case REAL_TYPE:
4646 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4647 mode if this is contained within an aggregate. */
4648 if (nested && TYPE_MODE (type) != TFmode)
4649 return TYPE_MODE (type);
4650 else
4651 return VOIDmode;
4653 case ARRAY_TYPE:
4654 return hfa_element_mode (TREE_TYPE (type), 1);
4656 case RECORD_TYPE:
4657 case UNION_TYPE:
4658 case QUAL_UNION_TYPE:
4659 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4661 if (TREE_CODE (t) != FIELD_DECL)
4662 continue;
4664 mode = hfa_element_mode (TREE_TYPE (t), 1);
4665 if (know_element_mode)
4667 if (mode != element_mode)
4668 return VOIDmode;
4670 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4671 return VOIDmode;
4672 else
4674 know_element_mode = 1;
4675 element_mode = mode;
4678 return element_mode;
4680 default:
4681 /* If we reach here, we probably have some front-end specific type
4682 that the backend doesn't know about. This can happen via the
4683 aggregate_value_p call in init_function_start. All we can do is
4684 ignore unknown tree types. */
4685 return VOIDmode;
4688 return VOIDmode;
4691 /* Return the number of words required to hold a quantity of TYPE and MODE
4692 when passed as an argument. */
4693 static int
4694 ia64_function_arg_words (const_tree type, machine_mode mode)
4696 int words;
4698 if (mode == BLKmode)
4699 words = int_size_in_bytes (type);
4700 else
4701 words = GET_MODE_SIZE (mode);
4703 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4706 /* Return the number of registers that should be skipped so the current
4707 argument (described by TYPE and WORDS) will be properly aligned.
4709 Integer and float arguments larger than 8 bytes start at the next
4710 even boundary. Aggregates larger than 8 bytes start at the next
4711 even boundary if the aggregate has 16 byte alignment. Note that
4712 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4713 but are still to be aligned in registers.
4715 ??? The ABI does not specify how to handle aggregates with
4716 alignment from 9 to 15 bytes, or greater than 16. We handle them
4717 all as if they had 16 byte alignment. Such aggregates can occur
4718 only if gcc extensions are used. */
4719 static int
4720 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4721 const_tree type, int words)
4723 /* No registers are skipped on VMS. */
4724 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4725 return 0;
4727 if (type
4728 && TREE_CODE (type) != INTEGER_TYPE
4729 && TREE_CODE (type) != REAL_TYPE)
4730 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4731 else
4732 return words > 1;
4735 /* Return rtx for register where argument is passed, or zero if it is passed
4736 on the stack. */
4737 /* ??? 128-bit quad-precision floats are always passed in general
4738 registers. */
4740 static rtx
4741 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4742 const_tree type, bool named, bool incoming)
4744 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4746 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4747 int words = ia64_function_arg_words (type, mode);
4748 int offset = ia64_function_arg_offset (cum, type, words);
4749 machine_mode hfa_mode = VOIDmode;
4751 /* For OPEN VMS, emit the instruction setting up the argument register here,
4752 when we know this will be together with the other arguments setup related
4753 insns. This is not the conceptually best place to do this, but this is
4754 the easiest as we have convenient access to cumulative args info. */
4756 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4757 && named == 1)
4759 unsigned HOST_WIDE_INT regval = cum->words;
4760 int i;
4762 for (i = 0; i < 8; i++)
4763 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4765 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4766 GEN_INT (regval));
4769 /* If all argument slots are used, then it must go on the stack. */
4770 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4771 return 0;
4773 /* On OpenVMS argument is either in Rn or Fn. */
4774 if (TARGET_ABI_OPEN_VMS)
4776 if (FLOAT_MODE_P (mode))
4777 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4778 else
4779 return gen_rtx_REG (mode, basereg + cum->words);
4782 /* Check for and handle homogeneous FP aggregates. */
4783 if (type)
4784 hfa_mode = hfa_element_mode (type, 0);
4786 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4787 and unprototyped hfas are passed specially. */
4788 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4790 rtx loc[16];
4791 int i = 0;
4792 int fp_regs = cum->fp_regs;
4793 int int_regs = cum->words + offset;
4794 int hfa_size = GET_MODE_SIZE (hfa_mode);
4795 int byte_size;
4796 int args_byte_size;
4798 /* If prototyped, pass it in FR regs then GR regs.
4799 If not prototyped, pass it in both FR and GR regs.
4801 If this is an SFmode aggregate, then it is possible to run out of
4802 FR regs while GR regs are still left. In that case, we pass the
4803 remaining part in the GR regs. */
4805 /* Fill the FP regs. We do this always. We stop if we reach the end
4806 of the argument, the last FP register, or the last argument slot. */
4808 byte_size = ((mode == BLKmode)
4809 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4810 args_byte_size = int_regs * UNITS_PER_WORD;
4811 offset = 0;
4812 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4813 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4815 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4816 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4817 + fp_regs)),
4818 GEN_INT (offset));
4819 offset += hfa_size;
4820 args_byte_size += hfa_size;
4821 fp_regs++;
4824 /* If no prototype, then the whole thing must go in GR regs. */
4825 if (! cum->prototype)
4826 offset = 0;
4827 /* If this is an SFmode aggregate, then we might have some left over
4828 that needs to go in GR regs. */
4829 else if (byte_size != offset)
4830 int_regs += offset / UNITS_PER_WORD;
4832 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4834 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4836 machine_mode gr_mode = DImode;
4837 unsigned int gr_size;
4839 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4840 then this goes in a GR reg left adjusted/little endian, right
4841 adjusted/big endian. */
4842 /* ??? Currently this is handled wrong, because 4-byte hunks are
4843 always right adjusted/little endian. */
4844 if (offset & 0x4)
4845 gr_mode = SImode;
4846 /* If we have an even 4 byte hunk because the aggregate is a
4847 multiple of 4 bytes in size, then this goes in a GR reg right
4848 adjusted/little endian. */
4849 else if (byte_size - offset == 4)
4850 gr_mode = SImode;
4852 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4853 gen_rtx_REG (gr_mode, (basereg
4854 + int_regs)),
4855 GEN_INT (offset));
4857 gr_size = GET_MODE_SIZE (gr_mode);
4858 offset += gr_size;
4859 if (gr_size == UNITS_PER_WORD
4860 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4861 int_regs++;
4862 else if (gr_size > UNITS_PER_WORD)
4863 int_regs += gr_size / UNITS_PER_WORD;
4865 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4868 /* Integral and aggregates go in general registers. If we have run out of
4869 FR registers, then FP values must also go in general registers. This can
4870 happen when we have a SFmode HFA. */
4871 else if (mode == TFmode || mode == TCmode
4872 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4874 int byte_size = ((mode == BLKmode)
4875 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4876 if (BYTES_BIG_ENDIAN
4877 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4878 && byte_size < UNITS_PER_WORD
4879 && byte_size > 0)
4881 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4882 gen_rtx_REG (DImode,
4883 (basereg + cum->words
4884 + offset)),
4885 const0_rtx);
4886 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4888 else
4889 return gen_rtx_REG (mode, basereg + cum->words + offset);
4893 /* If there is a prototype, then FP values go in a FR register when
4894 named, and in a GR register when unnamed. */
4895 else if (cum->prototype)
4897 if (named)
4898 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4899 /* In big-endian mode, an anonymous SFmode value must be represented
4900 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4901 the value into the high half of the general register. */
4902 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4903 return gen_rtx_PARALLEL (mode,
4904 gen_rtvec (1,
4905 gen_rtx_EXPR_LIST (VOIDmode,
4906 gen_rtx_REG (DImode, basereg + cum->words + offset),
4907 const0_rtx)));
4908 else
4909 return gen_rtx_REG (mode, basereg + cum->words + offset);
4911 /* If there is no prototype, then FP values go in both FR and GR
4912 registers. */
4913 else
4915 /* See comment above. */
4916 machine_mode inner_mode =
4917 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4919 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4920 gen_rtx_REG (mode, (FR_ARG_FIRST
4921 + cum->fp_regs)),
4922 const0_rtx);
4923 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4924 gen_rtx_REG (inner_mode,
4925 (basereg + cum->words
4926 + offset)),
4927 const0_rtx);
4929 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4933 /* Implement TARGET_FUNCION_ARG target hook. */
4935 static rtx
4936 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4937 const_tree type, bool named)
4939 return ia64_function_arg_1 (cum, mode, type, named, false);
4942 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4944 static rtx
4945 ia64_function_incoming_arg (cumulative_args_t cum,
4946 machine_mode mode,
4947 const_tree type, bool named)
4949 return ia64_function_arg_1 (cum, mode, type, named, true);
4952 /* Return number of bytes, at the beginning of the argument, that must be
4953 put in registers. 0 is the argument is entirely in registers or entirely
4954 in memory. */
4956 static int
4957 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4958 tree type, bool named ATTRIBUTE_UNUSED)
4960 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4962 int words = ia64_function_arg_words (type, mode);
4963 int offset = ia64_function_arg_offset (cum, type, words);
4965 /* If all argument slots are used, then it must go on the stack. */
4966 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4967 return 0;
4969 /* It doesn't matter whether the argument goes in FR or GR regs. If
4970 it fits within the 8 argument slots, then it goes entirely in
4971 registers. If it extends past the last argument slot, then the rest
4972 goes on the stack. */
4974 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4975 return 0;
4977 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4980 /* Return ivms_arg_type based on machine_mode. */
4982 static enum ivms_arg_type
4983 ia64_arg_type (machine_mode mode)
4985 switch (mode)
4987 case E_SFmode:
4988 return FS;
4989 case E_DFmode:
4990 return FT;
4991 default:
4992 return I64;
4996 /* Update CUM to point after this argument. This is patterned after
4997 ia64_function_arg. */
4999 static void
5000 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
5001 const_tree type, bool named)
5003 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5004 int words = ia64_function_arg_words (type, mode);
5005 int offset = ia64_function_arg_offset (cum, type, words);
5006 machine_mode hfa_mode = VOIDmode;
5008 /* If all arg slots are already full, then there is nothing to do. */
5009 if (cum->words >= MAX_ARGUMENT_SLOTS)
5011 cum->words += words + offset;
5012 return;
5015 cum->atypes[cum->words] = ia64_arg_type (mode);
5016 cum->words += words + offset;
5018 /* On OpenVMS argument is either in Rn or Fn. */
5019 if (TARGET_ABI_OPEN_VMS)
5021 cum->int_regs = cum->words;
5022 cum->fp_regs = cum->words;
5023 return;
5026 /* Check for and handle homogeneous FP aggregates. */
5027 if (type)
5028 hfa_mode = hfa_element_mode (type, 0);
5030 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5031 and unprototyped hfas are passed specially. */
5032 if (hfa_mode != VOIDmode && (! cum->prototype || named))
5034 int fp_regs = cum->fp_regs;
5035 /* This is the original value of cum->words + offset. */
5036 int int_regs = cum->words - words;
5037 int hfa_size = GET_MODE_SIZE (hfa_mode);
5038 int byte_size;
5039 int args_byte_size;
5041 /* If prototyped, pass it in FR regs then GR regs.
5042 If not prototyped, pass it in both FR and GR regs.
5044 If this is an SFmode aggregate, then it is possible to run out of
5045 FR regs while GR regs are still left. In that case, we pass the
5046 remaining part in the GR regs. */
5048 /* Fill the FP regs. We do this always. We stop if we reach the end
5049 of the argument, the last FP register, or the last argument slot. */
5051 byte_size = ((mode == BLKmode)
5052 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
5053 args_byte_size = int_regs * UNITS_PER_WORD;
5054 offset = 0;
5055 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5056 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5058 offset += hfa_size;
5059 args_byte_size += hfa_size;
5060 fp_regs++;
5063 cum->fp_regs = fp_regs;
5066 /* Integral and aggregates go in general registers. So do TFmode FP values.
5067 If we have run out of FR registers, then other FP values must also go in
5068 general registers. This can happen when we have a SFmode HFA. */
5069 else if (mode == TFmode || mode == TCmode
5070 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
5071 cum->int_regs = cum->words;
5073 /* If there is a prototype, then FP values go in a FR register when
5074 named, and in a GR register when unnamed. */
5075 else if (cum->prototype)
5077 if (! named)
5078 cum->int_regs = cum->words;
5079 else
5080 /* ??? Complex types should not reach here. */
5081 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5083 /* If there is no prototype, then FP values go in both FR and GR
5084 registers. */
5085 else
5087 /* ??? Complex types should not reach here. */
5088 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5089 cum->int_regs = cum->words;
5093 /* Arguments with alignment larger than 8 bytes start at the next even
5094 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5095 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5097 static unsigned int
5098 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5100 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5101 return PARM_BOUNDARY * 2;
5103 if (type)
5105 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5106 return PARM_BOUNDARY * 2;
5107 else
5108 return PARM_BOUNDARY;
5111 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5112 return PARM_BOUNDARY * 2;
5113 else
5114 return PARM_BOUNDARY;
5117 /* True if it is OK to do sibling call optimization for the specified
5118 call expression EXP. DECL will be the called function, or NULL if
5119 this is an indirect call. */
5120 static bool
5121 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5123 /* We can't perform a sibcall if the current function has the syscall_linkage
5124 attribute. */
5125 if (lookup_attribute ("syscall_linkage",
5126 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5127 return false;
5129 /* We must always return with our current GP. This means we can
5130 only sibcall to functions defined in the current module unless
5131 TARGET_CONST_GP is set to true. */
5132 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5136 /* Implement va_arg. */
5138 static tree
5139 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5140 gimple_seq *post_p)
5142 /* Variable sized types are passed by reference. */
5143 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5145 tree ptrtype = build_pointer_type (type);
5146 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5147 return build_va_arg_indirect_ref (addr);
5150 /* Aggregate arguments with alignment larger than 8 bytes start at
5151 the next even boundary. Integer and floating point arguments
5152 do so if they are larger than 8 bytes, whether or not they are
5153 also aligned larger than 8 bytes. */
5154 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5155 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5157 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5158 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5159 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5160 gimplify_assign (unshare_expr (valist), t, pre_p);
5163 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5166 /* Return 1 if function return value returned in memory. Return 0 if it is
5167 in a register. */
5169 static bool
5170 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5172 machine_mode mode;
5173 machine_mode hfa_mode;
5174 HOST_WIDE_INT byte_size;
5176 mode = TYPE_MODE (valtype);
5177 byte_size = GET_MODE_SIZE (mode);
5178 if (mode == BLKmode)
5180 byte_size = int_size_in_bytes (valtype);
5181 if (byte_size < 0)
5182 return true;
5185 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5187 hfa_mode = hfa_element_mode (valtype, 0);
5188 if (hfa_mode != VOIDmode)
5190 int hfa_size = GET_MODE_SIZE (hfa_mode);
5192 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5193 return true;
5194 else
5195 return false;
5197 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5198 return true;
5199 else
5200 return false;
5203 /* Return rtx for register that holds the function return value. */
5205 static rtx
5206 ia64_function_value (const_tree valtype,
5207 const_tree fn_decl_or_type,
5208 bool outgoing ATTRIBUTE_UNUSED)
5210 machine_mode mode;
5211 machine_mode hfa_mode;
5212 int unsignedp;
5213 const_tree func = fn_decl_or_type;
5215 if (fn_decl_or_type
5216 && !DECL_P (fn_decl_or_type))
5217 func = NULL;
5219 mode = TYPE_MODE (valtype);
5220 hfa_mode = hfa_element_mode (valtype, 0);
5222 if (hfa_mode != VOIDmode)
5224 rtx loc[8];
5225 int i;
5226 int hfa_size;
5227 int byte_size;
5228 int offset;
5230 hfa_size = GET_MODE_SIZE (hfa_mode);
5231 byte_size = ((mode == BLKmode)
5232 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5233 offset = 0;
5234 for (i = 0; offset < byte_size; i++)
5236 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5237 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5238 GEN_INT (offset));
5239 offset += hfa_size;
5241 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5243 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5244 return gen_rtx_REG (mode, FR_ARG_FIRST);
5245 else
5247 bool need_parallel = false;
5249 /* In big-endian mode, we need to manage the layout of aggregates
5250 in the registers so that we get the bits properly aligned in
5251 the highpart of the registers. */
5252 if (BYTES_BIG_ENDIAN
5253 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5254 need_parallel = true;
5256 /* Something like struct S { long double x; char a[0] } is not an
5257 HFA structure, and therefore doesn't go in fp registers. But
5258 the middle-end will give it XFmode anyway, and XFmode values
5259 don't normally fit in integer registers. So we need to smuggle
5260 the value inside a parallel. */
5261 else if (mode == XFmode || mode == XCmode || mode == RFmode)
5262 need_parallel = true;
5264 if (need_parallel)
5266 rtx loc[8];
5267 int offset;
5268 int bytesize;
5269 int i;
5271 offset = 0;
5272 bytesize = int_size_in_bytes (valtype);
5273 /* An empty PARALLEL is invalid here, but the return value
5274 doesn't matter for empty structs. */
5275 if (bytesize == 0)
5276 return gen_rtx_REG (mode, GR_RET_FIRST);
5277 for (i = 0; offset < bytesize; i++)
5279 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5280 gen_rtx_REG (DImode,
5281 GR_RET_FIRST + i),
5282 GEN_INT (offset));
5283 offset += UNITS_PER_WORD;
5285 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5288 mode = promote_function_mode (valtype, mode, &unsignedp,
5289 func ? TREE_TYPE (func) : NULL_TREE,
5290 true);
5292 return gen_rtx_REG (mode, GR_RET_FIRST);
5296 /* Worker function for TARGET_LIBCALL_VALUE. */
5298 static rtx
5299 ia64_libcall_value (machine_mode mode,
5300 const_rtx fun ATTRIBUTE_UNUSED)
5302 return gen_rtx_REG (mode,
5303 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5304 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5305 && (mode) != TFmode)
5306 ? FR_RET_FIRST : GR_RET_FIRST));
5309 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5311 static bool
5312 ia64_function_value_regno_p (const unsigned int regno)
5314 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5315 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5318 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5319 We need to emit DTP-relative relocations. */
5321 static void
5322 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5324 gcc_assert (size == 4 || size == 8);
5325 if (size == 4)
5326 fputs ("\tdata4.ua\t@dtprel(", file);
5327 else
5328 fputs ("\tdata8.ua\t@dtprel(", file);
5329 output_addr_const (file, x);
5330 fputs (")", file);
5333 /* Print a memory address as an operand to reference that memory location. */
5335 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5336 also call this from ia64_print_operand for memory addresses. */
5338 static void
5339 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5340 machine_mode /*mode*/,
5341 rtx address ATTRIBUTE_UNUSED)
5345 /* Print an operand to an assembler instruction.
5346 C Swap and print a comparison operator.
5347 D Print an FP comparison operator.
5348 E Print 32 - constant, for SImode shifts as extract.
5349 e Print 64 - constant, for DImode rotates.
5350 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5351 a floating point register emitted normally.
5352 G A floating point constant.
5353 I Invert a predicate register by adding 1.
5354 J Select the proper predicate register for a condition.
5355 j Select the inverse predicate register for a condition.
5356 O Append .acq for volatile load.
5357 P Postincrement of a MEM.
5358 Q Append .rel for volatile store.
5359 R Print .s .d or nothing for a single, double or no truncation.
5360 S Shift amount for shladd instruction.
5361 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5362 for Intel assembler.
5363 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5364 for Intel assembler.
5365 X A pair of floating point registers.
5366 r Print register name, or constant 0 as r0. HP compatibility for
5367 Linux kernel.
5368 v Print vector constant value as an 8-byte integer value. */
5370 static void
5371 ia64_print_operand (FILE * file, rtx x, int code)
5373 const char *str;
5375 switch (code)
5377 case 0:
5378 /* Handled below. */
5379 break;
5381 case 'C':
5383 enum rtx_code c = swap_condition (GET_CODE (x));
5384 fputs (GET_RTX_NAME (c), file);
5385 return;
5388 case 'D':
5389 switch (GET_CODE (x))
5391 case NE:
5392 str = "neq";
5393 break;
5394 case UNORDERED:
5395 str = "unord";
5396 break;
5397 case ORDERED:
5398 str = "ord";
5399 break;
5400 case UNLT:
5401 str = "nge";
5402 break;
5403 case UNLE:
5404 str = "ngt";
5405 break;
5406 case UNGT:
5407 str = "nle";
5408 break;
5409 case UNGE:
5410 str = "nlt";
5411 break;
5412 case UNEQ:
5413 case LTGT:
5414 gcc_unreachable ();
5415 default:
5416 str = GET_RTX_NAME (GET_CODE (x));
5417 break;
5419 fputs (str, file);
5420 return;
5422 case 'E':
5423 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5424 return;
5426 case 'e':
5427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5428 return;
5430 case 'F':
5431 if (x == CONST0_RTX (GET_MODE (x)))
5432 str = reg_names [FR_REG (0)];
5433 else if (x == CONST1_RTX (GET_MODE (x)))
5434 str = reg_names [FR_REG (1)];
5435 else
5437 gcc_assert (GET_CODE (x) == REG);
5438 str = reg_names [REGNO (x)];
5440 fputs (str, file);
5441 return;
5443 case 'G':
5445 long val[4];
5446 real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5447 if (GET_MODE (x) == SFmode)
5448 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5449 else if (GET_MODE (x) == DFmode)
5450 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5451 & 0xffffffff,
5452 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5453 & 0xffffffff);
5454 else
5455 output_operand_lossage ("invalid %%G mode");
5457 return;
5459 case 'I':
5460 fputs (reg_names [REGNO (x) + 1], file);
5461 return;
5463 case 'J':
5464 case 'j':
5466 unsigned int regno = REGNO (XEXP (x, 0));
5467 if (GET_CODE (x) == EQ)
5468 regno += 1;
5469 if (code == 'j')
5470 regno ^= 1;
5471 fputs (reg_names [regno], file);
5473 return;
5475 case 'O':
5476 if (MEM_VOLATILE_P (x))
5477 fputs(".acq", file);
5478 return;
5480 case 'P':
5482 HOST_WIDE_INT value;
5484 switch (GET_CODE (XEXP (x, 0)))
5486 default:
5487 return;
5489 case POST_MODIFY:
5490 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5491 if (GET_CODE (x) == CONST_INT)
5492 value = INTVAL (x);
5493 else
5495 gcc_assert (GET_CODE (x) == REG);
5496 fprintf (file, ", %s", reg_names[REGNO (x)]);
5497 return;
5499 break;
5501 case POST_INC:
5502 value = GET_MODE_SIZE (GET_MODE (x));
5503 break;
5505 case POST_DEC:
5506 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5507 break;
5510 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5511 return;
5514 case 'Q':
5515 if (MEM_VOLATILE_P (x))
5516 fputs(".rel", file);
5517 return;
5519 case 'R':
5520 if (x == CONST0_RTX (GET_MODE (x)))
5521 fputs(".s", file);
5522 else if (x == CONST1_RTX (GET_MODE (x)))
5523 fputs(".d", file);
5524 else if (x == CONST2_RTX (GET_MODE (x)))
5526 else
5527 output_operand_lossage ("invalid %%R value");
5528 return;
5530 case 'S':
5531 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5532 return;
5534 case 'T':
5535 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5537 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5538 return;
5540 break;
5542 case 'U':
5543 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5545 const char *prefix = "0x";
5546 if (INTVAL (x) & 0x80000000)
5548 fprintf (file, "0xffffffff");
5549 prefix = "";
5551 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5552 return;
5554 break;
5556 case 'X':
5558 unsigned int regno = REGNO (x);
5559 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5561 return;
5563 case 'r':
5564 /* If this operand is the constant zero, write it as register zero.
5565 Any register, zero, or CONST_INT value is OK here. */
5566 if (GET_CODE (x) == REG)
5567 fputs (reg_names[REGNO (x)], file);
5568 else if (x == CONST0_RTX (GET_MODE (x)))
5569 fputs ("r0", file);
5570 else if (GET_CODE (x) == CONST_INT)
5571 output_addr_const (file, x);
5572 else
5573 output_operand_lossage ("invalid %%r value");
5574 return;
5576 case 'v':
5577 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5578 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5579 break;
5581 case '+':
5583 const char *which;
5585 /* For conditional branches, returns or calls, substitute
5586 sptk, dptk, dpnt, or spnt for %s. */
5587 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5588 if (x)
5590 int pred_val = profile_probability::from_reg_br_prob_note
5591 (XINT (x, 0)).to_reg_br_prob_base ();
5593 /* Guess top and bottom 10% statically predicted. */
5594 if (pred_val < REG_BR_PROB_BASE / 50
5595 && br_prob_note_reliable_p (x))
5596 which = ".spnt";
5597 else if (pred_val < REG_BR_PROB_BASE / 2)
5598 which = ".dpnt";
5599 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5600 || !br_prob_note_reliable_p (x))
5601 which = ".dptk";
5602 else
5603 which = ".sptk";
5605 else if (CALL_P (current_output_insn))
5606 which = ".sptk";
5607 else
5608 which = ".dptk";
5610 fputs (which, file);
5611 return;
5614 case ',':
5615 x = current_insn_predicate;
5616 if (x)
5618 unsigned int regno = REGNO (XEXP (x, 0));
5619 if (GET_CODE (x) == EQ)
5620 regno += 1;
5621 fprintf (file, "(%s) ", reg_names [regno]);
5623 return;
5625 default:
5626 output_operand_lossage ("ia64_print_operand: unknown code");
5627 return;
5630 switch (GET_CODE (x))
5632 /* This happens for the spill/restore instructions. */
5633 case POST_INC:
5634 case POST_DEC:
5635 case POST_MODIFY:
5636 x = XEXP (x, 0);
5637 /* fall through */
5639 case REG:
5640 fputs (reg_names [REGNO (x)], file);
5641 break;
5643 case MEM:
5645 rtx addr = XEXP (x, 0);
5646 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5647 addr = XEXP (addr, 0);
5648 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5649 break;
5652 default:
5653 output_addr_const (file, x);
5654 break;
5657 return;
5660 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5662 static bool
5663 ia64_print_operand_punct_valid_p (unsigned char code)
5665 return (code == '+' || code == ',');
5668 /* Compute a (partial) cost for rtx X. Return true if the complete
5669 cost has been computed, and false if subexpressions should be
5670 scanned. In either case, *TOTAL contains the cost result. */
5671 /* ??? This is incomplete. */
5673 static bool
5674 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5675 int opno ATTRIBUTE_UNUSED,
5676 int *total, bool speed ATTRIBUTE_UNUSED)
5678 int code = GET_CODE (x);
5680 switch (code)
5682 case CONST_INT:
5683 switch (outer_code)
5685 case SET:
5686 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5687 return true;
5688 case PLUS:
5689 if (satisfies_constraint_I (x))
5690 *total = 0;
5691 else if (satisfies_constraint_J (x))
5692 *total = 1;
5693 else
5694 *total = COSTS_N_INSNS (1);
5695 return true;
5696 default:
5697 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5698 *total = 0;
5699 else
5700 *total = COSTS_N_INSNS (1);
5701 return true;
5704 case CONST_DOUBLE:
5705 *total = COSTS_N_INSNS (1);
5706 return true;
5708 case CONST:
5709 case SYMBOL_REF:
5710 case LABEL_REF:
5711 *total = COSTS_N_INSNS (3);
5712 return true;
5714 case FMA:
5715 *total = COSTS_N_INSNS (4);
5716 return true;
5718 case MULT:
5719 /* For multiplies wider than HImode, we have to go to the FPU,
5720 which normally involves copies. Plus there's the latency
5721 of the multiply itself, and the latency of the instructions to
5722 transfer integer regs to FP regs. */
5723 if (FLOAT_MODE_P (mode))
5724 *total = COSTS_N_INSNS (4);
5725 else if (GET_MODE_SIZE (mode) > 2)
5726 *total = COSTS_N_INSNS (10);
5727 else
5728 *total = COSTS_N_INSNS (2);
5729 return true;
5731 case PLUS:
5732 case MINUS:
5733 if (FLOAT_MODE_P (mode))
5735 *total = COSTS_N_INSNS (4);
5736 return true;
5738 /* FALLTHRU */
5740 case ASHIFT:
5741 case ASHIFTRT:
5742 case LSHIFTRT:
5743 *total = COSTS_N_INSNS (1);
5744 return true;
5746 case DIV:
5747 case UDIV:
5748 case MOD:
5749 case UMOD:
5750 /* We make divide expensive, so that divide-by-constant will be
5751 optimized to a multiply. */
5752 *total = COSTS_N_INSNS (60);
5753 return true;
5755 default:
5756 return false;
5760 /* Calculate the cost of moving data from a register in class FROM to
5761 one in class TO, using MODE. */
5763 static int
5764 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5765 reg_class_t to)
5767 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5768 if (to == ADDL_REGS)
5769 to = GR_REGS;
5770 if (from == ADDL_REGS)
5771 from = GR_REGS;
5773 /* All costs are symmetric, so reduce cases by putting the
5774 lower number class as the destination. */
5775 if (from < to)
5777 reg_class_t tmp = to;
5778 to = from, from = tmp;
5781 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5782 so that we get secondary memory reloads. Between FR_REGS,
5783 we have to make this at least as expensive as memory_move_cost
5784 to avoid spectacularly poor register class preferencing. */
5785 if (mode == XFmode || mode == RFmode)
5787 if (to != GR_REGS || from != GR_REGS)
5788 return memory_move_cost (mode, to, false);
5789 else
5790 return 3;
5793 switch (to)
5795 case PR_REGS:
5796 /* Moving between PR registers takes two insns. */
5797 if (from == PR_REGS)
5798 return 3;
5799 /* Moving between PR and anything but GR is impossible. */
5800 if (from != GR_REGS)
5801 return memory_move_cost (mode, to, false);
5802 break;
5804 case BR_REGS:
5805 /* Moving between BR and anything but GR is impossible. */
5806 if (from != GR_REGS && from != GR_AND_BR_REGS)
5807 return memory_move_cost (mode, to, false);
5808 break;
5810 case AR_I_REGS:
5811 case AR_M_REGS:
5812 /* Moving between AR and anything but GR is impossible. */
5813 if (from != GR_REGS)
5814 return memory_move_cost (mode, to, false);
5815 break;
5817 case GR_REGS:
5818 case FR_REGS:
5819 case FP_REGS:
5820 case GR_AND_FR_REGS:
5821 case GR_AND_BR_REGS:
5822 case ALL_REGS:
5823 break;
5825 default:
5826 gcc_unreachable ();
5829 return 2;
5832 /* Calculate the cost of moving data of MODE from a register to or from
5833 memory. */
5835 static int
5836 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5837 reg_class_t rclass,
5838 bool in ATTRIBUTE_UNUSED)
5840 if (rclass == GENERAL_REGS
5841 || rclass == FR_REGS
5842 || rclass == FP_REGS
5843 || rclass == GR_AND_FR_REGS)
5844 return 4;
5845 else
5846 return 10;
5849 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5850 on RCLASS to use when copying X into that class. */
5852 static reg_class_t
5853 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5855 switch (rclass)
5857 case FR_REGS:
5858 case FP_REGS:
5859 /* Don't allow volatile mem reloads into floating point registers.
5860 This is defined to force reload to choose the r/m case instead
5861 of the f/f case when reloading (set (reg fX) (mem/v)). */
5862 if (MEM_P (x) && MEM_VOLATILE_P (x))
5863 return NO_REGS;
5865 /* Force all unrecognized constants into the constant pool. */
5866 if (CONSTANT_P (x))
5867 return NO_REGS;
5868 break;
5870 case AR_M_REGS:
5871 case AR_I_REGS:
5872 if (!OBJECT_P (x))
5873 return NO_REGS;
5874 break;
5876 default:
5877 break;
5880 return rclass;
5883 /* This function returns the register class required for a secondary
5884 register when copying between one of the registers in RCLASS, and X,
5885 using MODE. A return value of NO_REGS means that no secondary register
5886 is required. */
5888 enum reg_class
5889 ia64_secondary_reload_class (enum reg_class rclass,
5890 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5892 int regno = -1;
5894 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5895 regno = true_regnum (x);
5897 switch (rclass)
5899 case BR_REGS:
5900 case AR_M_REGS:
5901 case AR_I_REGS:
5902 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5903 interaction. We end up with two pseudos with overlapping lifetimes
5904 both of which are equiv to the same constant, and both which need
5905 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5906 changes depending on the path length, which means the qty_first_reg
5907 check in make_regs_eqv can give different answers at different times.
5908 At some point I'll probably need a reload_indi pattern to handle
5909 this.
5911 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5912 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5913 non-general registers for good measure. */
5914 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5915 return GR_REGS;
5917 /* This is needed if a pseudo used as a call_operand gets spilled to a
5918 stack slot. */
5919 if (GET_CODE (x) == MEM)
5920 return GR_REGS;
5921 break;
5923 case FR_REGS:
5924 case FP_REGS:
5925 /* Need to go through general registers to get to other class regs. */
5926 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5927 return GR_REGS;
5929 /* This can happen when a paradoxical subreg is an operand to the
5930 muldi3 pattern. */
5931 /* ??? This shouldn't be necessary after instruction scheduling is
5932 enabled, because paradoxical subregs are not accepted by
5933 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5934 stop the paradoxical subreg stupidity in the *_operand functions
5935 in recog.c. */
5936 if (GET_CODE (x) == MEM
5937 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5938 || GET_MODE (x) == QImode))
5939 return GR_REGS;
5941 /* This can happen because of the ior/and/etc patterns that accept FP
5942 registers as operands. If the third operand is a constant, then it
5943 needs to be reloaded into a FP register. */
5944 if (GET_CODE (x) == CONST_INT)
5945 return GR_REGS;
5947 /* This can happen because of register elimination in a muldi3 insn.
5948 E.g. `26107 * (unsigned long)&u'. */
5949 if (GET_CODE (x) == PLUS)
5950 return GR_REGS;
5951 break;
5953 case PR_REGS:
5954 /* ??? This happens if we cse/gcse a BImode value across a call,
5955 and the function has a nonlocal goto. This is because global
5956 does not allocate call crossing pseudos to hard registers when
5957 crtl->has_nonlocal_goto is true. This is relatively
5958 common for C++ programs that use exceptions. To reproduce,
5959 return NO_REGS and compile libstdc++. */
5960 if (GET_CODE (x) == MEM)
5961 return GR_REGS;
5963 /* This can happen when we take a BImode subreg of a DImode value,
5964 and that DImode value winds up in some non-GR register. */
5965 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5966 return GR_REGS;
5967 break;
5969 default:
5970 break;
5973 return NO_REGS;
5977 /* Implement targetm.unspec_may_trap_p hook. */
5978 static int
5979 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5981 switch (XINT (x, 1))
5983 case UNSPEC_LDA:
5984 case UNSPEC_LDS:
5985 case UNSPEC_LDSA:
5986 case UNSPEC_LDCCLR:
5987 case UNSPEC_CHKACLR:
5988 case UNSPEC_CHKS:
5989 /* These unspecs are just wrappers. */
5990 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5993 return default_unspec_may_trap_p (x, flags);
5997 /* Parse the -mfixed-range= option string. */
5999 static void
6000 fix_range (const char *const_str)
6002 int i, first, last;
6003 char *str, *dash, *comma;
6005 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6006 REG2 are either register names or register numbers. The effect
6007 of this option is to mark the registers in the range from REG1 to
6008 REG2 as ``fixed'' so they won't be used by the compiler. This is
6009 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6011 i = strlen (const_str);
6012 str = (char *) alloca (i + 1);
6013 memcpy (str, const_str, i + 1);
6015 while (1)
6017 dash = strchr (str, '-');
6018 if (!dash)
6020 warning (0, "value of -mfixed-range must have form REG1-REG2");
6021 return;
6023 *dash = '\0';
6025 comma = strchr (dash + 1, ',');
6026 if (comma)
6027 *comma = '\0';
6029 first = decode_reg_name (str);
6030 if (first < 0)
6032 warning (0, "unknown register name: %s", str);
6033 return;
6036 last = decode_reg_name (dash + 1);
6037 if (last < 0)
6039 warning (0, "unknown register name: %s", dash + 1);
6040 return;
6043 *dash = '-';
6045 if (first > last)
6047 warning (0, "%s-%s is an empty range", str, dash + 1);
6048 return;
6051 for (i = first; i <= last; ++i)
6052 fixed_regs[i] = call_used_regs[i] = 1;
6054 if (!comma)
6055 break;
6057 *comma = ',';
6058 str = comma + 1;
6062 /* Implement TARGET_OPTION_OVERRIDE. */
6064 static void
6065 ia64_option_override (void)
6067 unsigned int i;
6068 cl_deferred_option *opt;
6069 vec<cl_deferred_option> *v
6070 = (vec<cl_deferred_option> *) ia64_deferred_options;
6072 if (v)
6073 FOR_EACH_VEC_ELT (*v, i, opt)
6075 switch (opt->opt_index)
6077 case OPT_mfixed_range_:
6078 fix_range (opt->arg);
6079 break;
6081 default:
6082 gcc_unreachable ();
6086 if (TARGET_AUTO_PIC)
6087 target_flags |= MASK_CONST_GP;
6089 /* Numerous experiment shows that IRA based loop pressure
6090 calculation works better for RTL loop invariant motion on targets
6091 with enough (>= 32) registers. It is an expensive optimization.
6092 So it is on only for peak performance. */
6093 if (optimize >= 3)
6094 flag_ira_loop_pressure = 1;
6097 ia64_section_threshold = (global_options_set.x_g_switch_value
6098 ? g_switch_value
6099 : IA64_DEFAULT_GVALUE);
6101 init_machine_status = ia64_init_machine_status;
6103 if (align_functions <= 0)
6104 align_functions = 64;
6105 if (align_loops <= 0)
6106 align_loops = 32;
6107 if (TARGET_ABI_OPEN_VMS)
6108 flag_no_common = 1;
6110 ia64_override_options_after_change();
6113 /* Implement targetm.override_options_after_change. */
6115 static void
6116 ia64_override_options_after_change (void)
6118 if (optimize >= 3
6119 && !global_options_set.x_flag_selective_scheduling
6120 && !global_options_set.x_flag_selective_scheduling2)
6122 flag_selective_scheduling2 = 1;
6123 flag_sel_sched_pipelining = 1;
6125 if (mflag_sched_control_spec == 2)
6127 /* Control speculation is on by default for the selective scheduler,
6128 but not for the Haifa scheduler. */
6129 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6131 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6133 /* FIXME: remove this when we'd implement breaking autoinsns as
6134 a transformation. */
6135 flag_auto_inc_dec = 0;
6139 /* Initialize the record of emitted frame related registers. */
6141 void ia64_init_expanders (void)
6143 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6146 static struct machine_function *
6147 ia64_init_machine_status (void)
6149 return ggc_cleared_alloc<machine_function> ();
6152 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6153 static enum attr_type ia64_safe_type (rtx_insn *);
6155 static enum attr_itanium_class
6156 ia64_safe_itanium_class (rtx_insn *insn)
6158 if (recog_memoized (insn) >= 0)
6159 return get_attr_itanium_class (insn);
6160 else if (DEBUG_INSN_P (insn))
6161 return ITANIUM_CLASS_IGNORE;
6162 else
6163 return ITANIUM_CLASS_UNKNOWN;
6166 static enum attr_type
6167 ia64_safe_type (rtx_insn *insn)
6169 if (recog_memoized (insn) >= 0)
6170 return get_attr_type (insn);
6171 else
6172 return TYPE_UNKNOWN;
6175 /* The following collection of routines emit instruction group stop bits as
6176 necessary to avoid dependencies. */
6178 /* Need to track some additional registers as far as serialization is
6179 concerned so we can properly handle br.call and br.ret. We could
6180 make these registers visible to gcc, but since these registers are
6181 never explicitly used in gcc generated code, it seems wasteful to
6182 do so (plus it would make the call and return patterns needlessly
6183 complex). */
6184 #define REG_RP (BR_REG (0))
6185 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6186 /* This is used for volatile asms which may require a stop bit immediately
6187 before and after them. */
6188 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6189 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6190 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6192 /* For each register, we keep track of how it has been written in the
6193 current instruction group.
6195 If a register is written unconditionally (no qualifying predicate),
6196 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6198 If a register is written if its qualifying predicate P is true, we
6199 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6200 may be written again by the complement of P (P^1) and when this happens,
6201 WRITE_COUNT gets set to 2.
6203 The result of this is that whenever an insn attempts to write a register
6204 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6206 If a predicate register is written by a floating-point insn, we set
6207 WRITTEN_BY_FP to true.
6209 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6210 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6212 #if GCC_VERSION >= 4000
6213 #define RWS_FIELD_TYPE __extension__ unsigned short
6214 #else
6215 #define RWS_FIELD_TYPE unsigned int
6216 #endif
6217 struct reg_write_state
6219 RWS_FIELD_TYPE write_count : 2;
6220 RWS_FIELD_TYPE first_pred : 10;
6221 RWS_FIELD_TYPE written_by_fp : 1;
6222 RWS_FIELD_TYPE written_by_and : 1;
6223 RWS_FIELD_TYPE written_by_or : 1;
6226 /* Cumulative info for the current instruction group. */
6227 struct reg_write_state rws_sum[NUM_REGS];
6228 #if CHECKING_P
6229 /* Bitmap whether a register has been written in the current insn. */
6230 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6231 / HOST_BITS_PER_WIDEST_FAST_INT];
6233 static inline void
6234 rws_insn_set (int regno)
6236 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6237 SET_HARD_REG_BIT (rws_insn, regno);
6240 static inline int
6241 rws_insn_test (int regno)
6243 return TEST_HARD_REG_BIT (rws_insn, regno);
6245 #else
6246 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6247 unsigned char rws_insn[2];
6249 static inline void
6250 rws_insn_set (int regno)
6252 if (regno == REG_AR_CFM)
6253 rws_insn[0] = 1;
6254 else if (regno == REG_VOLATILE)
6255 rws_insn[1] = 1;
6258 static inline int
6259 rws_insn_test (int regno)
6261 if (regno == REG_AR_CFM)
6262 return rws_insn[0];
6263 if (regno == REG_VOLATILE)
6264 return rws_insn[1];
6265 return 0;
6267 #endif
6269 /* Indicates whether this is the first instruction after a stop bit,
6270 in which case we don't need another stop bit. Without this,
6271 ia64_variable_issue will die when scheduling an alloc. */
6272 static int first_instruction;
6274 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6275 RTL for one instruction. */
6276 struct reg_flags
6278 unsigned int is_write : 1; /* Is register being written? */
6279 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6280 unsigned int is_branch : 1; /* Is register used as part of a branch? */
6281 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6282 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
6283 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6286 static void rws_update (int, struct reg_flags, int);
6287 static int rws_access_regno (int, struct reg_flags, int);
6288 static int rws_access_reg (rtx, struct reg_flags, int);
6289 static void update_set_flags (rtx, struct reg_flags *);
6290 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6291 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6292 static void init_insn_group_barriers (void);
6293 static int group_barrier_needed (rtx_insn *);
6294 static int safe_group_barrier_needed (rtx_insn *);
6295 static int in_safe_group_barrier;
6297 /* Update *RWS for REGNO, which is being written by the current instruction,
6298 with predicate PRED, and associated register flags in FLAGS. */
6300 static void
6301 rws_update (int regno, struct reg_flags flags, int pred)
6303 if (pred)
6304 rws_sum[regno].write_count++;
6305 else
6306 rws_sum[regno].write_count = 2;
6307 rws_sum[regno].written_by_fp |= flags.is_fp;
6308 /* ??? Not tracking and/or across differing predicates. */
6309 rws_sum[regno].written_by_and = flags.is_and;
6310 rws_sum[regno].written_by_or = flags.is_or;
6311 rws_sum[regno].first_pred = pred;
6314 /* Handle an access to register REGNO of type FLAGS using predicate register
6315 PRED. Update rws_sum array. Return 1 if this access creates
6316 a dependency with an earlier instruction in the same group. */
6318 static int
6319 rws_access_regno (int regno, struct reg_flags flags, int pred)
6321 int need_barrier = 0;
6323 gcc_assert (regno < NUM_REGS);
6325 if (! PR_REGNO_P (regno))
6326 flags.is_and = flags.is_or = 0;
6328 if (flags.is_write)
6330 int write_count;
6332 rws_insn_set (regno);
6333 write_count = rws_sum[regno].write_count;
6335 switch (write_count)
6337 case 0:
6338 /* The register has not been written yet. */
6339 if (!in_safe_group_barrier)
6340 rws_update (regno, flags, pred);
6341 break;
6343 case 1:
6344 /* The register has been written via a predicate. Treat
6345 it like a unconditional write and do not try to check
6346 for complementary pred reg in earlier write. */
6347 if (flags.is_and && rws_sum[regno].written_by_and)
6349 else if (flags.is_or && rws_sum[regno].written_by_or)
6351 else
6352 need_barrier = 1;
6353 if (!in_safe_group_barrier)
6354 rws_update (regno, flags, pred);
6355 break;
6357 case 2:
6358 /* The register has been unconditionally written already. We
6359 need a barrier. */
6360 if (flags.is_and && rws_sum[regno].written_by_and)
6362 else if (flags.is_or && rws_sum[regno].written_by_or)
6364 else
6365 need_barrier = 1;
6366 if (!in_safe_group_barrier)
6368 rws_sum[regno].written_by_and = flags.is_and;
6369 rws_sum[regno].written_by_or = flags.is_or;
6371 break;
6373 default:
6374 gcc_unreachable ();
6377 else
6379 if (flags.is_branch)
6381 /* Branches have several RAW exceptions that allow to avoid
6382 barriers. */
6384 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6385 /* RAW dependencies on branch regs are permissible as long
6386 as the writer is a non-branch instruction. Since we
6387 never generate code that uses a branch register written
6388 by a branch instruction, handling this case is
6389 easy. */
6390 return 0;
6392 if (REGNO_REG_CLASS (regno) == PR_REGS
6393 && ! rws_sum[regno].written_by_fp)
6394 /* The predicates of a branch are available within the
6395 same insn group as long as the predicate was written by
6396 something other than a floating-point instruction. */
6397 return 0;
6400 if (flags.is_and && rws_sum[regno].written_by_and)
6401 return 0;
6402 if (flags.is_or && rws_sum[regno].written_by_or)
6403 return 0;
6405 switch (rws_sum[regno].write_count)
6407 case 0:
6408 /* The register has not been written yet. */
6409 break;
6411 case 1:
6412 /* The register has been written via a predicate, assume we
6413 need a barrier (don't check for complementary regs). */
6414 need_barrier = 1;
6415 break;
6417 case 2:
6418 /* The register has been unconditionally written already. We
6419 need a barrier. */
6420 need_barrier = 1;
6421 break;
6423 default:
6424 gcc_unreachable ();
6428 return need_barrier;
6431 static int
6432 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6434 int regno = REGNO (reg);
6435 int n = REG_NREGS (reg);
6437 if (n == 1)
6438 return rws_access_regno (regno, flags, pred);
6439 else
6441 int need_barrier = 0;
6442 while (--n >= 0)
6443 need_barrier |= rws_access_regno (regno + n, flags, pred);
6444 return need_barrier;
6448 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6449 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6451 static void
6452 update_set_flags (rtx x, struct reg_flags *pflags)
6454 rtx src = SET_SRC (x);
6456 switch (GET_CODE (src))
6458 case CALL:
6459 return;
6461 case IF_THEN_ELSE:
6462 /* There are four cases here:
6463 (1) The destination is (pc), in which case this is a branch,
6464 nothing here applies.
6465 (2) The destination is ar.lc, in which case this is a
6466 doloop_end_internal,
6467 (3) The destination is an fp register, in which case this is
6468 an fselect instruction.
6469 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6470 this is a check load.
6471 In all cases, nothing we do in this function applies. */
6472 return;
6474 default:
6475 if (COMPARISON_P (src)
6476 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6477 /* Set pflags->is_fp to 1 so that we know we're dealing
6478 with a floating point comparison when processing the
6479 destination of the SET. */
6480 pflags->is_fp = 1;
6482 /* Discover if this is a parallel comparison. We only handle
6483 and.orcm and or.andcm at present, since we must retain a
6484 strict inverse on the predicate pair. */
6485 else if (GET_CODE (src) == AND)
6486 pflags->is_and = 1;
6487 else if (GET_CODE (src) == IOR)
6488 pflags->is_or = 1;
6490 break;
6494 /* Subroutine of rtx_needs_barrier; this function determines whether the
6495 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6496 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6497 for this insn. */
6499 static int
6500 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6502 int need_barrier = 0;
6503 rtx dst;
6504 rtx src = SET_SRC (x);
6506 if (GET_CODE (src) == CALL)
6507 /* We don't need to worry about the result registers that
6508 get written by subroutine call. */
6509 return rtx_needs_barrier (src, flags, pred);
6510 else if (SET_DEST (x) == pc_rtx)
6512 /* X is a conditional branch. */
6513 /* ??? This seems redundant, as the caller sets this bit for
6514 all JUMP_INSNs. */
6515 if (!ia64_spec_check_src_p (src))
6516 flags.is_branch = 1;
6517 return rtx_needs_barrier (src, flags, pred);
6520 if (ia64_spec_check_src_p (src))
6521 /* Avoid checking one register twice (in condition
6522 and in 'then' section) for ldc pattern. */
6524 gcc_assert (REG_P (XEXP (src, 2)));
6525 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6527 /* We process MEM below. */
6528 src = XEXP (src, 1);
6531 need_barrier |= rtx_needs_barrier (src, flags, pred);
6533 dst = SET_DEST (x);
6534 if (GET_CODE (dst) == ZERO_EXTRACT)
6536 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6537 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6539 return need_barrier;
6542 /* Handle an access to rtx X of type FLAGS using predicate register
6543 PRED. Return 1 if this access creates a dependency with an earlier
6544 instruction in the same group. */
6546 static int
6547 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6549 int i, j;
6550 int is_complemented = 0;
6551 int need_barrier = 0;
6552 const char *format_ptr;
6553 struct reg_flags new_flags;
6554 rtx cond;
6556 if (! x)
6557 return 0;
6559 new_flags = flags;
6561 switch (GET_CODE (x))
6563 case SET:
6564 update_set_flags (x, &new_flags);
6565 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6566 if (GET_CODE (SET_SRC (x)) != CALL)
6568 new_flags.is_write = 1;
6569 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6571 break;
6573 case CALL:
6574 new_flags.is_write = 0;
6575 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6577 /* Avoid multiple register writes, in case this is a pattern with
6578 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6579 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6581 new_flags.is_write = 1;
6582 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6583 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6584 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6586 break;
6588 case COND_EXEC:
6589 /* X is a predicated instruction. */
6591 cond = COND_EXEC_TEST (x);
6592 gcc_assert (!pred);
6593 need_barrier = rtx_needs_barrier (cond, flags, 0);
6595 if (GET_CODE (cond) == EQ)
6596 is_complemented = 1;
6597 cond = XEXP (cond, 0);
6598 gcc_assert (GET_CODE (cond) == REG
6599 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6600 pred = REGNO (cond);
6601 if (is_complemented)
6602 ++pred;
6604 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6605 return need_barrier;
6607 case CLOBBER:
6608 case USE:
6609 /* Clobber & use are for earlier compiler-phases only. */
6610 break;
6612 case ASM_OPERANDS:
6613 case ASM_INPUT:
6614 /* We always emit stop bits for traditional asms. We emit stop bits
6615 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6616 if (GET_CODE (x) != ASM_OPERANDS
6617 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6619 /* Avoid writing the register multiple times if we have multiple
6620 asm outputs. This avoids a failure in rws_access_reg. */
6621 if (! rws_insn_test (REG_VOLATILE))
6623 new_flags.is_write = 1;
6624 rws_access_regno (REG_VOLATILE, new_flags, pred);
6626 return 1;
6629 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6630 We cannot just fall through here since then we would be confused
6631 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6632 traditional asms unlike their normal usage. */
6634 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6635 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6636 need_barrier = 1;
6637 break;
6639 case PARALLEL:
6640 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6642 rtx pat = XVECEXP (x, 0, i);
6643 switch (GET_CODE (pat))
6645 case SET:
6646 update_set_flags (pat, &new_flags);
6647 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6648 break;
6650 case USE:
6651 case CALL:
6652 case ASM_OPERANDS:
6653 case ASM_INPUT:
6654 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6655 break;
6657 case CLOBBER:
6658 if (REG_P (XEXP (pat, 0))
6659 && extract_asm_operands (x) != NULL_RTX
6660 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6662 new_flags.is_write = 1;
6663 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6664 new_flags, pred);
6665 new_flags = flags;
6667 break;
6669 case RETURN:
6670 break;
6672 default:
6673 gcc_unreachable ();
6676 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6678 rtx pat = XVECEXP (x, 0, i);
6679 if (GET_CODE (pat) == SET)
6681 if (GET_CODE (SET_SRC (pat)) != CALL)
6683 new_flags.is_write = 1;
6684 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6685 pred);
6688 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6689 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6691 break;
6693 case SUBREG:
6694 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6695 break;
6696 case REG:
6697 if (REGNO (x) == AR_UNAT_REGNUM)
6699 for (i = 0; i < 64; ++i)
6700 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6702 else
6703 need_barrier = rws_access_reg (x, flags, pred);
6704 break;
6706 case MEM:
6707 /* Find the regs used in memory address computation. */
6708 new_flags.is_write = 0;
6709 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6710 break;
6712 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6713 case SYMBOL_REF: case LABEL_REF: case CONST:
6714 break;
6716 /* Operators with side-effects. */
6717 case POST_INC: case POST_DEC:
6718 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6720 new_flags.is_write = 0;
6721 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6722 new_flags.is_write = 1;
6723 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6724 break;
6726 case POST_MODIFY:
6727 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6729 new_flags.is_write = 0;
6730 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6731 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6732 new_flags.is_write = 1;
6733 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6734 break;
6736 /* Handle common unary and binary ops for efficiency. */
6737 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6738 case MOD: case UDIV: case UMOD: case AND: case IOR:
6739 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6740 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6741 case NE: case EQ: case GE: case GT: case LE:
6742 case LT: case GEU: case GTU: case LEU: case LTU:
6743 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6744 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6745 break;
6747 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6748 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6749 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6750 case SQRT: case FFS: case POPCOUNT:
6751 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6752 break;
6754 case VEC_SELECT:
6755 /* VEC_SELECT's second argument is a PARALLEL with integers that
6756 describe the elements selected. On ia64, those integers are
6757 always constants. Avoid walking the PARALLEL so that we don't
6758 get confused with "normal" parallels and then die. */
6759 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6760 break;
6762 case UNSPEC:
6763 switch (XINT (x, 1))
6765 case UNSPEC_LTOFF_DTPMOD:
6766 case UNSPEC_LTOFF_DTPREL:
6767 case UNSPEC_DTPREL:
6768 case UNSPEC_LTOFF_TPREL:
6769 case UNSPEC_TPREL:
6770 case UNSPEC_PRED_REL_MUTEX:
6771 case UNSPEC_PIC_CALL:
6772 case UNSPEC_MF:
6773 case UNSPEC_FETCHADD_ACQ:
6774 case UNSPEC_FETCHADD_REL:
6775 case UNSPEC_BSP_VALUE:
6776 case UNSPEC_FLUSHRS:
6777 case UNSPEC_BUNDLE_SELECTOR:
6778 break;
6780 case UNSPEC_GR_SPILL:
6781 case UNSPEC_GR_RESTORE:
6783 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6784 HOST_WIDE_INT bit = (offset >> 3) & 63;
6786 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6787 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6788 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6789 new_flags, pred);
6790 break;
6793 case UNSPEC_FR_SPILL:
6794 case UNSPEC_FR_RESTORE:
6795 case UNSPEC_GETF_EXP:
6796 case UNSPEC_SETF_EXP:
6797 case UNSPEC_ADDP4:
6798 case UNSPEC_FR_SQRT_RECIP_APPROX:
6799 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6800 case UNSPEC_LDA:
6801 case UNSPEC_LDS:
6802 case UNSPEC_LDS_A:
6803 case UNSPEC_LDSA:
6804 case UNSPEC_CHKACLR:
6805 case UNSPEC_CHKS:
6806 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6807 break;
6809 case UNSPEC_FR_RECIP_APPROX:
6810 case UNSPEC_SHRP:
6811 case UNSPEC_COPYSIGN:
6812 case UNSPEC_FR_RECIP_APPROX_RES:
6813 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6814 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6815 break;
6817 case UNSPEC_CMPXCHG_ACQ:
6818 case UNSPEC_CMPXCHG_REL:
6819 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6820 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6821 break;
6823 default:
6824 gcc_unreachable ();
6826 break;
6828 case UNSPEC_VOLATILE:
6829 switch (XINT (x, 1))
6831 case UNSPECV_ALLOC:
6832 /* Alloc must always be the first instruction of a group.
6833 We force this by always returning true. */
6834 /* ??? We might get better scheduling if we explicitly check for
6835 input/local/output register dependencies, and modify the
6836 scheduler so that alloc is always reordered to the start of
6837 the current group. We could then eliminate all of the
6838 first_instruction code. */
6839 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6841 new_flags.is_write = 1;
6842 rws_access_regno (REG_AR_CFM, new_flags, pred);
6843 return 1;
6845 case UNSPECV_SET_BSP:
6846 case UNSPECV_PROBE_STACK_RANGE:
6847 need_barrier = 1;
6848 break;
6850 case UNSPECV_BLOCKAGE:
6851 case UNSPECV_INSN_GROUP_BARRIER:
6852 case UNSPECV_BREAK:
6853 case UNSPECV_PSAC_ALL:
6854 case UNSPECV_PSAC_NORMAL:
6855 return 0;
6857 case UNSPECV_PROBE_STACK_ADDRESS:
6858 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6859 break;
6861 default:
6862 gcc_unreachable ();
6864 break;
6866 case RETURN:
6867 new_flags.is_write = 0;
6868 need_barrier = rws_access_regno (REG_RP, flags, pred);
6869 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6871 new_flags.is_write = 1;
6872 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6873 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6874 break;
6876 default:
6877 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6878 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6879 switch (format_ptr[i])
6881 case '0': /* unused field */
6882 case 'i': /* integer */
6883 case 'n': /* note */
6884 case 'w': /* wide integer */
6885 case 's': /* pointer to string */
6886 case 'S': /* optional pointer to string */
6887 break;
6889 case 'e':
6890 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6891 need_barrier = 1;
6892 break;
6894 case 'E':
6895 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6896 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6897 need_barrier = 1;
6898 break;
6900 default:
6901 gcc_unreachable ();
6903 break;
6905 return need_barrier;
6908 /* Clear out the state for group_barrier_needed at the start of a
6909 sequence of insns. */
6911 static void
6912 init_insn_group_barriers (void)
6914 memset (rws_sum, 0, sizeof (rws_sum));
6915 first_instruction = 1;
6918 /* Given the current state, determine whether a group barrier (a stop bit) is
6919 necessary before INSN. Return nonzero if so. This modifies the state to
6920 include the effects of INSN as a side-effect. */
6922 static int
6923 group_barrier_needed (rtx_insn *insn)
6925 rtx pat;
6926 int need_barrier = 0;
6927 struct reg_flags flags;
6929 memset (&flags, 0, sizeof (flags));
6930 switch (GET_CODE (insn))
6932 case NOTE:
6933 case DEBUG_INSN:
6934 break;
6936 case BARRIER:
6937 /* A barrier doesn't imply an instruction group boundary. */
6938 break;
6940 case CODE_LABEL:
6941 memset (rws_insn, 0, sizeof (rws_insn));
6942 return 1;
6944 case CALL_INSN:
6945 flags.is_branch = 1;
6946 flags.is_sibcall = SIBLING_CALL_P (insn);
6947 memset (rws_insn, 0, sizeof (rws_insn));
6949 /* Don't bundle a call following another call. */
6950 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6952 need_barrier = 1;
6953 break;
6956 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6957 break;
6959 case JUMP_INSN:
6960 if (!ia64_spec_check_p (insn))
6961 flags.is_branch = 1;
6963 /* Don't bundle a jump following a call. */
6964 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6966 need_barrier = 1;
6967 break;
6969 /* FALLTHRU */
6971 case INSN:
6972 if (GET_CODE (PATTERN (insn)) == USE
6973 || GET_CODE (PATTERN (insn)) == CLOBBER)
6974 /* Don't care about USE and CLOBBER "insns"---those are used to
6975 indicate to the optimizer that it shouldn't get rid of
6976 certain operations. */
6977 break;
6979 pat = PATTERN (insn);
6981 /* Ug. Hack hacks hacked elsewhere. */
6982 switch (recog_memoized (insn))
6984 /* We play dependency tricks with the epilogue in order
6985 to get proper schedules. Undo this for dv analysis. */
6986 case CODE_FOR_epilogue_deallocate_stack:
6987 case CODE_FOR_prologue_allocate_stack:
6988 pat = XVECEXP (pat, 0, 0);
6989 break;
6991 /* The pattern we use for br.cloop confuses the code above.
6992 The second element of the vector is representative. */
6993 case CODE_FOR_doloop_end_internal:
6994 pat = XVECEXP (pat, 0, 1);
6995 break;
6997 /* Doesn't generate code. */
6998 case CODE_FOR_pred_rel_mutex:
6999 case CODE_FOR_prologue_use:
7000 return 0;
7002 default:
7003 break;
7006 memset (rws_insn, 0, sizeof (rws_insn));
7007 need_barrier = rtx_needs_barrier (pat, flags, 0);
7009 /* Check to see if the previous instruction was a volatile
7010 asm. */
7011 if (! need_barrier)
7012 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
7014 break;
7016 default:
7017 gcc_unreachable ();
7020 if (first_instruction && important_for_bundling_p (insn))
7022 need_barrier = 0;
7023 first_instruction = 0;
7026 return need_barrier;
7029 /* Like group_barrier_needed, but do not clobber the current state. */
7031 static int
7032 safe_group_barrier_needed (rtx_insn *insn)
7034 int saved_first_instruction;
7035 int t;
7037 saved_first_instruction = first_instruction;
7038 in_safe_group_barrier = 1;
7040 t = group_barrier_needed (insn);
7042 first_instruction = saved_first_instruction;
7043 in_safe_group_barrier = 0;
7045 return t;
7048 /* Scan the current function and insert stop bits as necessary to
7049 eliminate dependencies. This function assumes that a final
7050 instruction scheduling pass has been run which has already
7051 inserted most of the necessary stop bits. This function only
7052 inserts new ones at basic block boundaries, since these are
7053 invisible to the scheduler. */
7055 static void
7056 emit_insn_group_barriers (FILE *dump)
7058 rtx_insn *insn;
7059 rtx_insn *last_label = 0;
7060 int insns_since_last_label = 0;
7062 init_insn_group_barriers ();
7064 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7066 if (LABEL_P (insn))
7068 if (insns_since_last_label)
7069 last_label = insn;
7070 insns_since_last_label = 0;
7072 else if (NOTE_P (insn)
7073 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7075 if (insns_since_last_label)
7076 last_label = insn;
7077 insns_since_last_label = 0;
7079 else if (NONJUMP_INSN_P (insn)
7080 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7081 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7083 init_insn_group_barriers ();
7084 last_label = 0;
7086 else if (NONDEBUG_INSN_P (insn))
7088 insns_since_last_label = 1;
7090 if (group_barrier_needed (insn))
7092 if (last_label)
7094 if (dump)
7095 fprintf (dump, "Emitting stop before label %d\n",
7096 INSN_UID (last_label));
7097 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7098 insn = last_label;
7100 init_insn_group_barriers ();
7101 last_label = 0;
7108 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7109 This function has to emit all necessary group barriers. */
7111 static void
7112 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7114 rtx_insn *insn;
7116 init_insn_group_barriers ();
7118 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7120 if (BARRIER_P (insn))
7122 rtx_insn *last = prev_active_insn (insn);
7124 if (! last)
7125 continue;
7126 if (JUMP_TABLE_DATA_P (last))
7127 last = prev_active_insn (last);
7128 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7129 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7131 init_insn_group_barriers ();
7133 else if (NONDEBUG_INSN_P (insn))
7135 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7136 init_insn_group_barriers ();
7137 else if (group_barrier_needed (insn))
7139 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7140 init_insn_group_barriers ();
7141 group_barrier_needed (insn);
7149 /* Instruction scheduling support. */
7151 #define NR_BUNDLES 10
7153 /* A list of names of all available bundles. */
7155 static const char *bundle_name [NR_BUNDLES] =
7157 ".mii",
7158 ".mmi",
7159 ".mfi",
7160 ".mmf",
7161 #if NR_BUNDLES == 10
7162 ".bbb",
7163 ".mbb",
7164 #endif
7165 ".mib",
7166 ".mmb",
7167 ".mfb",
7168 ".mlx"
7171 /* Nonzero if we should insert stop bits into the schedule. */
7173 int ia64_final_schedule = 0;
7175 /* Codes of the corresponding queried units: */
7177 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7178 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7180 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7181 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7183 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7185 /* The following variable value is an insn group barrier. */
7187 static rtx_insn *dfa_stop_insn;
7189 /* The following variable value is the last issued insn. */
7191 static rtx_insn *last_scheduled_insn;
7193 /* The following variable value is pointer to a DFA state used as
7194 temporary variable. */
7196 static state_t temp_dfa_state = NULL;
7198 /* The following variable value is DFA state after issuing the last
7199 insn. */
7201 static state_t prev_cycle_state = NULL;
7203 /* The following array element values are TRUE if the corresponding
7204 insn requires to add stop bits before it. */
7206 static char *stops_p = NULL;
7208 /* The following variable is used to set up the mentioned above array. */
7210 static int stop_before_p = 0;
7212 /* The following variable value is length of the arrays `clocks' and
7213 `add_cycles'. */
7215 static int clocks_length;
7217 /* The following variable value is number of data speculations in progress. */
7218 static int pending_data_specs = 0;
7220 /* Number of memory references on current and three future processor cycles. */
7221 static char mem_ops_in_group[4];
7223 /* Number of current processor cycle (from scheduler's point of view). */
7224 static int current_cycle;
7226 static rtx ia64_single_set (rtx_insn *);
7227 static void ia64_emit_insn_before (rtx, rtx_insn *);
7229 /* Map a bundle number to its pseudo-op. */
7231 const char *
7232 get_bundle_name (int b)
7234 return bundle_name[b];
7238 /* Return the maximum number of instructions a cpu can issue. */
7240 static int
7241 ia64_issue_rate (void)
7243 return 6;
7246 /* Helper function - like single_set, but look inside COND_EXEC. */
7248 static rtx
7249 ia64_single_set (rtx_insn *insn)
7251 rtx x = PATTERN (insn), ret;
7252 if (GET_CODE (x) == COND_EXEC)
7253 x = COND_EXEC_CODE (x);
7254 if (GET_CODE (x) == SET)
7255 return x;
7257 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7258 Although they are not classical single set, the second set is there just
7259 to protect it from moving past FP-relative stack accesses. */
7260 switch (recog_memoized (insn))
7262 case CODE_FOR_prologue_allocate_stack:
7263 case CODE_FOR_prologue_allocate_stack_pr:
7264 case CODE_FOR_epilogue_deallocate_stack:
7265 case CODE_FOR_epilogue_deallocate_stack_pr:
7266 ret = XVECEXP (x, 0, 0);
7267 break;
7269 default:
7270 ret = single_set_2 (insn, x);
7271 break;
7274 return ret;
7277 /* Adjust the cost of a scheduling dependency.
7278 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7279 COST is the current cost, DW is dependency weakness. */
7280 static int
7281 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7282 int cost, dw_t dw)
7284 enum reg_note dep_type = (enum reg_note) dep_type1;
7285 enum attr_itanium_class dep_class;
7286 enum attr_itanium_class insn_class;
7288 insn_class = ia64_safe_itanium_class (insn);
7289 dep_class = ia64_safe_itanium_class (dep_insn);
7291 /* Treat true memory dependencies separately. Ignore apparent true
7292 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7293 if (dep_type == REG_DEP_TRUE
7294 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7295 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7296 return 0;
7298 if (dw == MIN_DEP_WEAK)
7299 /* Store and load are likely to alias, use higher cost to avoid stall. */
7300 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7301 else if (dw > MIN_DEP_WEAK)
7303 /* Store and load are less likely to alias. */
7304 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7305 /* Assume there will be no cache conflict for floating-point data.
7306 For integer data, L1 conflict penalty is huge (17 cycles), so we
7307 never assume it will not cause a conflict. */
7308 return 0;
7309 else
7310 return cost;
7313 if (dep_type != REG_DEP_OUTPUT)
7314 return cost;
7316 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7317 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7318 return 0;
7320 return cost;
7323 /* Like emit_insn_before, but skip cycle_display notes.
7324 ??? When cycle display notes are implemented, update this. */
7326 static void
7327 ia64_emit_insn_before (rtx insn, rtx_insn *before)
7329 emit_insn_before (insn, before);
7332 /* The following function marks insns who produce addresses for load
7333 and store insns. Such insns will be placed into M slots because it
7334 decrease latency time for Itanium1 (see function
7335 `ia64_produce_address_p' and the DFA descriptions). */
7337 static void
7338 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7340 rtx_insn *insn, *next, *next_tail;
7342 /* Before reload, which_alternative is not set, which means that
7343 ia64_safe_itanium_class will produce wrong results for (at least)
7344 move instructions. */
7345 if (!reload_completed)
7346 return;
7348 next_tail = NEXT_INSN (tail);
7349 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7350 if (INSN_P (insn))
7351 insn->call = 0;
7352 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7353 if (INSN_P (insn)
7354 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7356 sd_iterator_def sd_it;
7357 dep_t dep;
7358 bool has_mem_op_consumer_p = false;
7360 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7362 enum attr_itanium_class c;
7364 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7365 continue;
7367 next = DEP_CON (dep);
7368 c = ia64_safe_itanium_class (next);
7369 if ((c == ITANIUM_CLASS_ST
7370 || c == ITANIUM_CLASS_STF)
7371 && ia64_st_address_bypass_p (insn, next))
7373 has_mem_op_consumer_p = true;
7374 break;
7376 else if ((c == ITANIUM_CLASS_LD
7377 || c == ITANIUM_CLASS_FLD
7378 || c == ITANIUM_CLASS_FLDP)
7379 && ia64_ld_address_bypass_p (insn, next))
7381 has_mem_op_consumer_p = true;
7382 break;
7386 insn->call = has_mem_op_consumer_p;
7390 /* We're beginning a new block. Initialize data structures as necessary. */
7392 static void
7393 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7394 int sched_verbose ATTRIBUTE_UNUSED,
7395 int max_ready ATTRIBUTE_UNUSED)
7397 if (flag_checking && !sel_sched_p () && reload_completed)
7399 for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7400 insn != current_sched_info->next_tail;
7401 insn = NEXT_INSN (insn))
7402 gcc_assert (!SCHED_GROUP_P (insn));
7404 last_scheduled_insn = NULL;
7405 init_insn_group_barriers ();
7407 current_cycle = 0;
7408 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7411 /* We're beginning a scheduling pass. Check assertion. */
7413 static void
7414 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7415 int sched_verbose ATTRIBUTE_UNUSED,
7416 int max_ready ATTRIBUTE_UNUSED)
7418 gcc_assert (pending_data_specs == 0);
7421 /* Scheduling pass is now finished. Free/reset static variable. */
7422 static void
7423 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7424 int sched_verbose ATTRIBUTE_UNUSED)
7426 gcc_assert (pending_data_specs == 0);
7429 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7430 speculation check), FALSE otherwise. */
7431 static bool
7432 is_load_p (rtx_insn *insn)
7434 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7436 return
7437 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7438 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7441 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7442 (taking account for 3-cycle cache reference postponing for stores: Intel
7443 Itanium 2 Reference Manual for Software Development and Optimization,
7444 6.7.3.1). */
7445 static void
7446 record_memory_reference (rtx_insn *insn)
7448 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7450 switch (insn_class) {
7451 case ITANIUM_CLASS_FLD:
7452 case ITANIUM_CLASS_LD:
7453 mem_ops_in_group[current_cycle % 4]++;
7454 break;
7455 case ITANIUM_CLASS_STF:
7456 case ITANIUM_CLASS_ST:
7457 mem_ops_in_group[(current_cycle + 3) % 4]++;
7458 break;
7459 default:;
7463 /* We are about to being issuing insns for this clock cycle.
7464 Override the default sort algorithm to better slot instructions. */
7466 static int
7467 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7468 int *pn_ready, int clock_var,
7469 int reorder_type)
7471 int n_asms;
7472 int n_ready = *pn_ready;
7473 rtx_insn **e_ready = ready + n_ready;
7474 rtx_insn **insnp;
7476 if (sched_verbose)
7477 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7479 if (reorder_type == 0)
7481 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7482 n_asms = 0;
7483 for (insnp = ready; insnp < e_ready; insnp++)
7484 if (insnp < e_ready)
7486 rtx_insn *insn = *insnp;
7487 enum attr_type t = ia64_safe_type (insn);
7488 if (t == TYPE_UNKNOWN)
7490 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7491 || asm_noperands (PATTERN (insn)) >= 0)
7493 rtx_insn *lowest = ready[n_asms];
7494 ready[n_asms] = insn;
7495 *insnp = lowest;
7496 n_asms++;
7498 else
7500 rtx_insn *highest = ready[n_ready - 1];
7501 ready[n_ready - 1] = insn;
7502 *insnp = highest;
7503 return 1;
7508 if (n_asms < n_ready)
7510 /* Some normal insns to process. Skip the asms. */
7511 ready += n_asms;
7512 n_ready -= n_asms;
7514 else if (n_ready > 0)
7515 return 1;
7518 if (ia64_final_schedule)
7520 int deleted = 0;
7521 int nr_need_stop = 0;
7523 for (insnp = ready; insnp < e_ready; insnp++)
7524 if (safe_group_barrier_needed (*insnp))
7525 nr_need_stop++;
7527 if (reorder_type == 1 && n_ready == nr_need_stop)
7528 return 0;
7529 if (reorder_type == 0)
7530 return 1;
7531 insnp = e_ready;
7532 /* Move down everything that needs a stop bit, preserving
7533 relative order. */
7534 while (insnp-- > ready + deleted)
7535 while (insnp >= ready + deleted)
7537 rtx_insn *insn = *insnp;
7538 if (! safe_group_barrier_needed (insn))
7539 break;
7540 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7541 *ready = insn;
7542 deleted++;
7544 n_ready -= deleted;
7545 ready += deleted;
7548 current_cycle = clock_var;
7549 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7551 int moved = 0;
7553 insnp = e_ready;
7554 /* Move down loads/stores, preserving relative order. */
7555 while (insnp-- > ready + moved)
7556 while (insnp >= ready + moved)
7558 rtx_insn *insn = *insnp;
7559 if (! is_load_p (insn))
7560 break;
7561 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7562 *ready = insn;
7563 moved++;
7565 n_ready -= moved;
7566 ready += moved;
7569 return 1;
7572 /* We are about to being issuing insns for this clock cycle. Override
7573 the default sort algorithm to better slot instructions. */
7575 static int
7576 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7577 int *pn_ready, int clock_var)
7579 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7580 pn_ready, clock_var, 0);
7583 /* Like ia64_sched_reorder, but called after issuing each insn.
7584 Override the default sort algorithm to better slot instructions. */
7586 static int
7587 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7588 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7589 int *pn_ready, int clock_var)
7591 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7592 clock_var, 1);
7595 /* We are about to issue INSN. Return the number of insns left on the
7596 ready queue that can be issued this cycle. */
7598 static int
7599 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7600 int sched_verbose ATTRIBUTE_UNUSED,
7601 rtx_insn *insn,
7602 int can_issue_more ATTRIBUTE_UNUSED)
7604 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7605 /* Modulo scheduling does not extend h_i_d when emitting
7606 new instructions. Don't use h_i_d, if we don't have to. */
7608 if (DONE_SPEC (insn) & BEGIN_DATA)
7609 pending_data_specs++;
7610 if (CHECK_SPEC (insn) & BEGIN_DATA)
7611 pending_data_specs--;
7614 if (DEBUG_INSN_P (insn))
7615 return 1;
7617 last_scheduled_insn = insn;
7618 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7619 if (reload_completed)
7621 int needed = group_barrier_needed (insn);
7623 gcc_assert (!needed);
7624 if (CALL_P (insn))
7625 init_insn_group_barriers ();
7626 stops_p [INSN_UID (insn)] = stop_before_p;
7627 stop_before_p = 0;
7629 record_memory_reference (insn);
7631 return 1;
7634 /* We are choosing insn from the ready queue. Return zero if INSN
7635 can be chosen. */
7637 static int
7638 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7640 gcc_assert (insn && INSN_P (insn));
7642 /* Size of ALAT is 32. As far as we perform conservative
7643 data speculation, we keep ALAT half-empty. */
7644 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7645 return ready_index == 0 ? -1 : 1;
7647 if (ready_index == 0)
7648 return 0;
7650 if ((!reload_completed
7651 || !safe_group_barrier_needed (insn))
7652 && (!mflag_sched_mem_insns_hard_limit
7653 || !is_load_p (insn)
7654 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7655 return 0;
7657 return 1;
7660 /* The following variable value is pseudo-insn used by the DFA insn
7661 scheduler to change the DFA state when the simulated clock is
7662 increased. */
7664 static rtx_insn *dfa_pre_cycle_insn;
7666 /* Returns 1 when a meaningful insn was scheduled between the last group
7667 barrier and LAST. */
7668 static int
7669 scheduled_good_insn (rtx_insn *last)
7671 if (last && recog_memoized (last) >= 0)
7672 return 1;
7674 for ( ;
7675 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7676 && !stops_p[INSN_UID (last)];
7677 last = PREV_INSN (last))
7678 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7679 the ebb we're scheduling. */
7680 if (INSN_P (last) && recog_memoized (last) >= 0)
7681 return 1;
7683 return 0;
7686 /* We are about to being issuing INSN. Return nonzero if we cannot
7687 issue it on given cycle CLOCK and return zero if we should not sort
7688 the ready queue on the next clock start. */
7690 static int
7691 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7692 int clock, int *sort_p)
7694 gcc_assert (insn && INSN_P (insn));
7696 if (DEBUG_INSN_P (insn))
7697 return 0;
7699 /* When a group barrier is needed for insn, last_scheduled_insn
7700 should be set. */
7701 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7702 || last_scheduled_insn);
7704 if ((reload_completed
7705 && (safe_group_barrier_needed (insn)
7706 || (mflag_sched_stop_bits_after_every_cycle
7707 && last_clock != clock
7708 && last_scheduled_insn
7709 && scheduled_good_insn (last_scheduled_insn))))
7710 || (last_scheduled_insn
7711 && (CALL_P (last_scheduled_insn)
7712 || unknown_for_bundling_p (last_scheduled_insn))))
7714 init_insn_group_barriers ();
7716 if (verbose && dump)
7717 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7718 last_clock == clock ? " + cycle advance" : "");
7720 stop_before_p = 1;
7721 current_cycle = clock;
7722 mem_ops_in_group[current_cycle % 4] = 0;
7724 if (last_clock == clock)
7726 state_transition (curr_state, dfa_stop_insn);
7727 if (TARGET_EARLY_STOP_BITS)
7728 *sort_p = (last_scheduled_insn == NULL_RTX
7729 || ! CALL_P (last_scheduled_insn));
7730 else
7731 *sort_p = 0;
7732 return 1;
7735 if (last_scheduled_insn)
7737 if (unknown_for_bundling_p (last_scheduled_insn))
7738 state_reset (curr_state);
7739 else
7741 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7742 state_transition (curr_state, dfa_stop_insn);
7743 state_transition (curr_state, dfa_pre_cycle_insn);
7744 state_transition (curr_state, NULL);
7748 return 0;
7751 /* Implement targetm.sched.h_i_d_extended hook.
7752 Extend internal data structures. */
7753 static void
7754 ia64_h_i_d_extended (void)
7756 if (stops_p != NULL)
7758 int new_clocks_length = get_max_uid () * 3 / 2;
7759 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7760 clocks_length = new_clocks_length;
7765 /* This structure describes the data used by the backend to guide scheduling.
7766 When the current scheduling point is switched, this data should be saved
7767 and restored later, if the scheduler returns to this point. */
7768 struct _ia64_sched_context
7770 state_t prev_cycle_state;
7771 rtx_insn *last_scheduled_insn;
7772 struct reg_write_state rws_sum[NUM_REGS];
7773 struct reg_write_state rws_insn[NUM_REGS];
7774 int first_instruction;
7775 int pending_data_specs;
7776 int current_cycle;
7777 char mem_ops_in_group[4];
7779 typedef struct _ia64_sched_context *ia64_sched_context_t;
7781 /* Allocates a scheduling context. */
7782 static void *
7783 ia64_alloc_sched_context (void)
7785 return xmalloc (sizeof (struct _ia64_sched_context));
7788 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7789 the global context otherwise. */
7790 static void
7791 ia64_init_sched_context (void *_sc, bool clean_p)
7793 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7795 sc->prev_cycle_state = xmalloc (dfa_state_size);
7796 if (clean_p)
7798 state_reset (sc->prev_cycle_state);
7799 sc->last_scheduled_insn = NULL;
7800 memset (sc->rws_sum, 0, sizeof (rws_sum));
7801 memset (sc->rws_insn, 0, sizeof (rws_insn));
7802 sc->first_instruction = 1;
7803 sc->pending_data_specs = 0;
7804 sc->current_cycle = 0;
7805 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7807 else
7809 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7810 sc->last_scheduled_insn = last_scheduled_insn;
7811 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7812 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7813 sc->first_instruction = first_instruction;
7814 sc->pending_data_specs = pending_data_specs;
7815 sc->current_cycle = current_cycle;
7816 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7820 /* Sets the global scheduling context to the one pointed to by _SC. */
7821 static void
7822 ia64_set_sched_context (void *_sc)
7824 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7826 gcc_assert (sc != NULL);
7828 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7829 last_scheduled_insn = sc->last_scheduled_insn;
7830 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7831 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7832 first_instruction = sc->first_instruction;
7833 pending_data_specs = sc->pending_data_specs;
7834 current_cycle = sc->current_cycle;
7835 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7838 /* Clears the data in the _SC scheduling context. */
7839 static void
7840 ia64_clear_sched_context (void *_sc)
7842 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7844 free (sc->prev_cycle_state);
7845 sc->prev_cycle_state = NULL;
7848 /* Frees the _SC scheduling context. */
7849 static void
7850 ia64_free_sched_context (void *_sc)
7852 gcc_assert (_sc != NULL);
7854 free (_sc);
7857 typedef rtx (* gen_func_t) (rtx, rtx);
7859 /* Return a function that will generate a load of mode MODE_NO
7860 with speculation types TS. */
7861 static gen_func_t
7862 get_spec_load_gen_function (ds_t ts, int mode_no)
7864 static gen_func_t gen_ld_[] = {
7865 gen_movbi,
7866 gen_movqi_internal,
7867 gen_movhi_internal,
7868 gen_movsi_internal,
7869 gen_movdi_internal,
7870 gen_movsf_internal,
7871 gen_movdf_internal,
7872 gen_movxf_internal,
7873 gen_movti_internal,
7874 gen_zero_extendqidi2,
7875 gen_zero_extendhidi2,
7876 gen_zero_extendsidi2,
7879 static gen_func_t gen_ld_a[] = {
7880 gen_movbi_advanced,
7881 gen_movqi_advanced,
7882 gen_movhi_advanced,
7883 gen_movsi_advanced,
7884 gen_movdi_advanced,
7885 gen_movsf_advanced,
7886 gen_movdf_advanced,
7887 gen_movxf_advanced,
7888 gen_movti_advanced,
7889 gen_zero_extendqidi2_advanced,
7890 gen_zero_extendhidi2_advanced,
7891 gen_zero_extendsidi2_advanced,
7893 static gen_func_t gen_ld_s[] = {
7894 gen_movbi_speculative,
7895 gen_movqi_speculative,
7896 gen_movhi_speculative,
7897 gen_movsi_speculative,
7898 gen_movdi_speculative,
7899 gen_movsf_speculative,
7900 gen_movdf_speculative,
7901 gen_movxf_speculative,
7902 gen_movti_speculative,
7903 gen_zero_extendqidi2_speculative,
7904 gen_zero_extendhidi2_speculative,
7905 gen_zero_extendsidi2_speculative,
7907 static gen_func_t gen_ld_sa[] = {
7908 gen_movbi_speculative_advanced,
7909 gen_movqi_speculative_advanced,
7910 gen_movhi_speculative_advanced,
7911 gen_movsi_speculative_advanced,
7912 gen_movdi_speculative_advanced,
7913 gen_movsf_speculative_advanced,
7914 gen_movdf_speculative_advanced,
7915 gen_movxf_speculative_advanced,
7916 gen_movti_speculative_advanced,
7917 gen_zero_extendqidi2_speculative_advanced,
7918 gen_zero_extendhidi2_speculative_advanced,
7919 gen_zero_extendsidi2_speculative_advanced,
7921 static gen_func_t gen_ld_s_a[] = {
7922 gen_movbi_speculative_a,
7923 gen_movqi_speculative_a,
7924 gen_movhi_speculative_a,
7925 gen_movsi_speculative_a,
7926 gen_movdi_speculative_a,
7927 gen_movsf_speculative_a,
7928 gen_movdf_speculative_a,
7929 gen_movxf_speculative_a,
7930 gen_movti_speculative_a,
7931 gen_zero_extendqidi2_speculative_a,
7932 gen_zero_extendhidi2_speculative_a,
7933 gen_zero_extendsidi2_speculative_a,
7936 gen_func_t *gen_ld;
7938 if (ts & BEGIN_DATA)
7940 if (ts & BEGIN_CONTROL)
7941 gen_ld = gen_ld_sa;
7942 else
7943 gen_ld = gen_ld_a;
7945 else if (ts & BEGIN_CONTROL)
7947 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7948 || ia64_needs_block_p (ts))
7949 gen_ld = gen_ld_s;
7950 else
7951 gen_ld = gen_ld_s_a;
7953 else if (ts == 0)
7954 gen_ld = gen_ld_;
7955 else
7956 gcc_unreachable ();
7958 return gen_ld[mode_no];
7961 /* Constants that help mapping 'machine_mode' to int. */
7962 enum SPEC_MODES
7964 SPEC_MODE_INVALID = -1,
7965 SPEC_MODE_FIRST = 0,
7966 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7967 SPEC_MODE_FOR_EXTEND_LAST = 3,
7968 SPEC_MODE_LAST = 8
7971 enum
7973 /* Offset to reach ZERO_EXTEND patterns. */
7974 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7977 /* Return index of the MODE. */
7978 static int
7979 ia64_mode_to_int (machine_mode mode)
7981 switch (mode)
7983 case E_BImode: return 0; /* SPEC_MODE_FIRST */
7984 case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7985 case E_HImode: return 2;
7986 case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7987 case E_DImode: return 4;
7988 case E_SFmode: return 5;
7989 case E_DFmode: return 6;
7990 case E_XFmode: return 7;
7991 case E_TImode:
7992 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7993 mentioned in itanium[12].md. Predicate fp_register_operand also
7994 needs to be defined. Bottom line: better disable for now. */
7995 return SPEC_MODE_INVALID;
7996 default: return SPEC_MODE_INVALID;
8000 /* Provide information about speculation capabilities. */
8001 static void
8002 ia64_set_sched_flags (spec_info_t spec_info)
8004 unsigned int *flags = &(current_sched_info->flags);
8006 if (*flags & SCHED_RGN
8007 || *flags & SCHED_EBB
8008 || *flags & SEL_SCHED)
8010 int mask = 0;
8012 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
8013 || (mflag_sched_ar_data_spec && reload_completed))
8015 mask |= BEGIN_DATA;
8017 if (!sel_sched_p ()
8018 && ((mflag_sched_br_in_data_spec && !reload_completed)
8019 || (mflag_sched_ar_in_data_spec && reload_completed)))
8020 mask |= BE_IN_DATA;
8023 if (mflag_sched_control_spec
8024 && (!sel_sched_p ()
8025 || reload_completed))
8027 mask |= BEGIN_CONTROL;
8029 if (!sel_sched_p () && mflag_sched_in_control_spec)
8030 mask |= BE_IN_CONTROL;
8033 spec_info->mask = mask;
8035 if (mask)
8037 *flags |= USE_DEPS_LIST | DO_SPECULATION;
8039 if (mask & BE_IN_SPEC)
8040 *flags |= NEW_BBS;
8042 spec_info->flags = 0;
8044 if ((mask & CONTROL_SPEC)
8045 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8046 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
8048 if (sched_verbose >= 1)
8049 spec_info->dump = sched_dump;
8050 else
8051 spec_info->dump = 0;
8053 if (mflag_sched_count_spec_in_critical_path)
8054 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8057 else
8058 spec_info->mask = 0;
8061 /* If INSN is an appropriate load return its mode.
8062 Return -1 otherwise. */
8063 static int
8064 get_mode_no_for_insn (rtx_insn *insn)
8066 rtx reg, mem, mode_rtx;
8067 int mode_no;
8068 bool extend_p;
8070 extract_insn_cached (insn);
8072 /* We use WHICH_ALTERNATIVE only after reload. This will
8073 guarantee that reload won't touch a speculative insn. */
8075 if (recog_data.n_operands != 2)
8076 return -1;
8078 reg = recog_data.operand[0];
8079 mem = recog_data.operand[1];
8081 /* We should use MEM's mode since REG's mode in presence of
8082 ZERO_EXTEND will always be DImode. */
8083 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8084 /* Process non-speculative ld. */
8086 if (!reload_completed)
8088 /* Do not speculate into regs like ar.lc. */
8089 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8090 return -1;
8092 if (!MEM_P (mem))
8093 return -1;
8096 rtx mem_reg = XEXP (mem, 0);
8098 if (!REG_P (mem_reg))
8099 return -1;
8102 mode_rtx = mem;
8104 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8106 gcc_assert (REG_P (reg) && MEM_P (mem));
8107 mode_rtx = mem;
8109 else
8110 return -1;
8112 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8113 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8114 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8115 /* Process speculative ld or ld.c. */
8117 gcc_assert (REG_P (reg) && MEM_P (mem));
8118 mode_rtx = mem;
8120 else
8122 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8124 if (attr_class == ITANIUM_CLASS_CHK_A
8125 || attr_class == ITANIUM_CLASS_CHK_S_I
8126 || attr_class == ITANIUM_CLASS_CHK_S_F)
8127 /* Process chk. */
8128 mode_rtx = reg;
8129 else
8130 return -1;
8133 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8135 if (mode_no == SPEC_MODE_INVALID)
8136 return -1;
8138 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8140 if (extend_p)
8142 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8143 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8144 return -1;
8146 mode_no += SPEC_GEN_EXTEND_OFFSET;
8149 return mode_no;
8152 /* If X is an unspec part of a speculative load, return its code.
8153 Return -1 otherwise. */
8154 static int
8155 get_spec_unspec_code (const_rtx x)
8157 if (GET_CODE (x) != UNSPEC)
8158 return -1;
8161 int code;
8163 code = XINT (x, 1);
8165 switch (code)
8167 case UNSPEC_LDA:
8168 case UNSPEC_LDS:
8169 case UNSPEC_LDS_A:
8170 case UNSPEC_LDSA:
8171 return code;
8173 default:
8174 return -1;
8179 /* Implement skip_rtx_p hook. */
8180 static bool
8181 ia64_skip_rtx_p (const_rtx x)
8183 return get_spec_unspec_code (x) != -1;
8186 /* If INSN is a speculative load, return its UNSPEC code.
8187 Return -1 otherwise. */
8188 static int
8189 get_insn_spec_code (const_rtx insn)
8191 rtx pat, reg, mem;
8193 pat = PATTERN (insn);
8195 if (GET_CODE (pat) == COND_EXEC)
8196 pat = COND_EXEC_CODE (pat);
8198 if (GET_CODE (pat) != SET)
8199 return -1;
8201 reg = SET_DEST (pat);
8202 if (!REG_P (reg))
8203 return -1;
8205 mem = SET_SRC (pat);
8206 if (GET_CODE (mem) == ZERO_EXTEND)
8207 mem = XEXP (mem, 0);
8209 return get_spec_unspec_code (mem);
8212 /* If INSN is a speculative load, return a ds with the speculation types.
8213 Otherwise [if INSN is a normal instruction] return 0. */
8214 static ds_t
8215 ia64_get_insn_spec_ds (rtx_insn *insn)
8217 int code = get_insn_spec_code (insn);
8219 switch (code)
8221 case UNSPEC_LDA:
8222 return BEGIN_DATA;
8224 case UNSPEC_LDS:
8225 case UNSPEC_LDS_A:
8226 return BEGIN_CONTROL;
8228 case UNSPEC_LDSA:
8229 return BEGIN_DATA | BEGIN_CONTROL;
8231 default:
8232 return 0;
8236 /* If INSN is a speculative load return a ds with the speculation types that
8237 will be checked.
8238 Otherwise [if INSN is a normal instruction] return 0. */
8239 static ds_t
8240 ia64_get_insn_checked_ds (rtx_insn *insn)
8242 int code = get_insn_spec_code (insn);
8244 switch (code)
8246 case UNSPEC_LDA:
8247 return BEGIN_DATA | BEGIN_CONTROL;
8249 case UNSPEC_LDS:
8250 return BEGIN_CONTROL;
8252 case UNSPEC_LDS_A:
8253 case UNSPEC_LDSA:
8254 return BEGIN_DATA | BEGIN_CONTROL;
8256 default:
8257 return 0;
8261 /* If GEN_P is true, calculate the index of needed speculation check and return
8262 speculative pattern for INSN with speculative mode TS, machine mode
8263 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8264 If GEN_P is false, just calculate the index of needed speculation check. */
8265 static rtx
8266 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8268 rtx pat, new_pat;
8269 gen_func_t gen_load;
8271 gen_load = get_spec_load_gen_function (ts, mode_no);
8273 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8274 copy_rtx (recog_data.operand[1]));
8276 pat = PATTERN (insn);
8277 if (GET_CODE (pat) == COND_EXEC)
8278 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8279 new_pat);
8281 return new_pat;
8284 static bool
8285 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8286 ds_t ds ATTRIBUTE_UNUSED)
8288 return false;
8291 /* Implement targetm.sched.speculate_insn hook.
8292 Check if the INSN can be TS speculative.
8293 If 'no' - return -1.
8294 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8295 If current pattern of the INSN already provides TS speculation,
8296 return 0. */
8297 static int
8298 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8300 int mode_no;
8301 int res;
8303 gcc_assert (!(ts & ~SPECULATIVE));
8305 if (ia64_spec_check_p (insn))
8306 return -1;
8308 if ((ts & BE_IN_SPEC)
8309 && !insn_can_be_in_speculative_p (insn, ts))
8310 return -1;
8312 mode_no = get_mode_no_for_insn (insn);
8314 if (mode_no != SPEC_MODE_INVALID)
8316 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8317 res = 0;
8318 else
8320 res = 1;
8321 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8324 else
8325 res = -1;
8327 return res;
8330 /* Return a function that will generate a check for speculation TS with mode
8331 MODE_NO.
8332 If simple check is needed, pass true for SIMPLE_CHECK_P.
8333 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8334 static gen_func_t
8335 get_spec_check_gen_function (ds_t ts, int mode_no,
8336 bool simple_check_p, bool clearing_check_p)
8338 static gen_func_t gen_ld_c_clr[] = {
8339 gen_movbi_clr,
8340 gen_movqi_clr,
8341 gen_movhi_clr,
8342 gen_movsi_clr,
8343 gen_movdi_clr,
8344 gen_movsf_clr,
8345 gen_movdf_clr,
8346 gen_movxf_clr,
8347 gen_movti_clr,
8348 gen_zero_extendqidi2_clr,
8349 gen_zero_extendhidi2_clr,
8350 gen_zero_extendsidi2_clr,
8352 static gen_func_t gen_ld_c_nc[] = {
8353 gen_movbi_nc,
8354 gen_movqi_nc,
8355 gen_movhi_nc,
8356 gen_movsi_nc,
8357 gen_movdi_nc,
8358 gen_movsf_nc,
8359 gen_movdf_nc,
8360 gen_movxf_nc,
8361 gen_movti_nc,
8362 gen_zero_extendqidi2_nc,
8363 gen_zero_extendhidi2_nc,
8364 gen_zero_extendsidi2_nc,
8366 static gen_func_t gen_chk_a_clr[] = {
8367 gen_advanced_load_check_clr_bi,
8368 gen_advanced_load_check_clr_qi,
8369 gen_advanced_load_check_clr_hi,
8370 gen_advanced_load_check_clr_si,
8371 gen_advanced_load_check_clr_di,
8372 gen_advanced_load_check_clr_sf,
8373 gen_advanced_load_check_clr_df,
8374 gen_advanced_load_check_clr_xf,
8375 gen_advanced_load_check_clr_ti,
8376 gen_advanced_load_check_clr_di,
8377 gen_advanced_load_check_clr_di,
8378 gen_advanced_load_check_clr_di,
8380 static gen_func_t gen_chk_a_nc[] = {
8381 gen_advanced_load_check_nc_bi,
8382 gen_advanced_load_check_nc_qi,
8383 gen_advanced_load_check_nc_hi,
8384 gen_advanced_load_check_nc_si,
8385 gen_advanced_load_check_nc_di,
8386 gen_advanced_load_check_nc_sf,
8387 gen_advanced_load_check_nc_df,
8388 gen_advanced_load_check_nc_xf,
8389 gen_advanced_load_check_nc_ti,
8390 gen_advanced_load_check_nc_di,
8391 gen_advanced_load_check_nc_di,
8392 gen_advanced_load_check_nc_di,
8394 static gen_func_t gen_chk_s[] = {
8395 gen_speculation_check_bi,
8396 gen_speculation_check_qi,
8397 gen_speculation_check_hi,
8398 gen_speculation_check_si,
8399 gen_speculation_check_di,
8400 gen_speculation_check_sf,
8401 gen_speculation_check_df,
8402 gen_speculation_check_xf,
8403 gen_speculation_check_ti,
8404 gen_speculation_check_di,
8405 gen_speculation_check_di,
8406 gen_speculation_check_di,
8409 gen_func_t *gen_check;
8411 if (ts & BEGIN_DATA)
8413 /* We don't need recovery because even if this is ld.sa
8414 ALAT entry will be allocated only if NAT bit is set to zero.
8415 So it is enough to use ld.c here. */
8417 if (simple_check_p)
8419 gcc_assert (mflag_sched_spec_ldc);
8421 if (clearing_check_p)
8422 gen_check = gen_ld_c_clr;
8423 else
8424 gen_check = gen_ld_c_nc;
8426 else
8428 if (clearing_check_p)
8429 gen_check = gen_chk_a_clr;
8430 else
8431 gen_check = gen_chk_a_nc;
8434 else if (ts & BEGIN_CONTROL)
8436 if (simple_check_p)
8437 /* We might want to use ld.sa -> ld.c instead of
8438 ld.s -> chk.s. */
8440 gcc_assert (!ia64_needs_block_p (ts));
8442 if (clearing_check_p)
8443 gen_check = gen_ld_c_clr;
8444 else
8445 gen_check = gen_ld_c_nc;
8447 else
8449 gen_check = gen_chk_s;
8452 else
8453 gcc_unreachable ();
8455 gcc_assert (mode_no >= 0);
8456 return gen_check[mode_no];
8459 /* Return nonzero, if INSN needs branchy recovery check. */
8460 static bool
8461 ia64_needs_block_p (ds_t ts)
8463 if (ts & BEGIN_DATA)
8464 return !mflag_sched_spec_ldc;
8466 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8468 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8471 /* Generate (or regenerate) a recovery check for INSN. */
8472 static rtx
8473 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8475 rtx op1, pat, check_pat;
8476 gen_func_t gen_check;
8477 int mode_no;
8479 mode_no = get_mode_no_for_insn (insn);
8480 gcc_assert (mode_no >= 0);
8482 if (label)
8483 op1 = label;
8484 else
8486 gcc_assert (!ia64_needs_block_p (ds));
8487 op1 = copy_rtx (recog_data.operand[1]);
8490 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8491 true);
8493 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8495 pat = PATTERN (insn);
8496 if (GET_CODE (pat) == COND_EXEC)
8497 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8498 check_pat);
8500 return check_pat;
8503 /* Return nonzero, if X is branchy recovery check. */
8504 static int
8505 ia64_spec_check_p (rtx x)
8507 x = PATTERN (x);
8508 if (GET_CODE (x) == COND_EXEC)
8509 x = COND_EXEC_CODE (x);
8510 if (GET_CODE (x) == SET)
8511 return ia64_spec_check_src_p (SET_SRC (x));
8512 return 0;
8515 /* Return nonzero, if SRC belongs to recovery check. */
8516 static int
8517 ia64_spec_check_src_p (rtx src)
8519 if (GET_CODE (src) == IF_THEN_ELSE)
8521 rtx t;
8523 t = XEXP (src, 0);
8524 if (GET_CODE (t) == NE)
8526 t = XEXP (t, 0);
8528 if (GET_CODE (t) == UNSPEC)
8530 int code;
8532 code = XINT (t, 1);
8534 if (code == UNSPEC_LDCCLR
8535 || code == UNSPEC_LDCNC
8536 || code == UNSPEC_CHKACLR
8537 || code == UNSPEC_CHKANC
8538 || code == UNSPEC_CHKS)
8540 gcc_assert (code != 0);
8541 return code;
8546 return 0;
8550 /* The following page contains abstract data `bundle states' which are
8551 used for bundling insns (inserting nops and template generation). */
8553 /* The following describes state of insn bundling. */
8555 struct bundle_state
8557 /* Unique bundle state number to identify them in the debugging
8558 output */
8559 int unique_num;
8560 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
8561 /* number nops before and after the insn */
8562 short before_nops_num, after_nops_num;
8563 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8564 insn */
8565 int cost; /* cost of the state in cycles */
8566 int accumulated_insns_num; /* number of all previous insns including
8567 nops. L is considered as 2 insns */
8568 int branch_deviation; /* deviation of previous branches from 3rd slots */
8569 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8570 struct bundle_state *next; /* next state with the same insn_num */
8571 struct bundle_state *originator; /* originator (previous insn state) */
8572 /* All bundle states are in the following chain. */
8573 struct bundle_state *allocated_states_chain;
8574 /* The DFA State after issuing the insn and the nops. */
8575 state_t dfa_state;
8578 /* The following is map insn number to the corresponding bundle state. */
8580 static struct bundle_state **index_to_bundle_states;
8582 /* The unique number of next bundle state. */
8584 static int bundle_states_num;
8586 /* All allocated bundle states are in the following chain. */
8588 static struct bundle_state *allocated_bundle_states_chain;
8590 /* All allocated but not used bundle states are in the following
8591 chain. */
8593 static struct bundle_state *free_bundle_state_chain;
8596 /* The following function returns a free bundle state. */
8598 static struct bundle_state *
8599 get_free_bundle_state (void)
8601 struct bundle_state *result;
8603 if (free_bundle_state_chain != NULL)
8605 result = free_bundle_state_chain;
8606 free_bundle_state_chain = result->next;
8608 else
8610 result = XNEW (struct bundle_state);
8611 result->dfa_state = xmalloc (dfa_state_size);
8612 result->allocated_states_chain = allocated_bundle_states_chain;
8613 allocated_bundle_states_chain = result;
8615 result->unique_num = bundle_states_num++;
8616 return result;
8620 /* The following function frees given bundle state. */
8622 static void
8623 free_bundle_state (struct bundle_state *state)
8625 state->next = free_bundle_state_chain;
8626 free_bundle_state_chain = state;
8629 /* Start work with abstract data `bundle states'. */
8631 static void
8632 initiate_bundle_states (void)
8634 bundle_states_num = 0;
8635 free_bundle_state_chain = NULL;
8636 allocated_bundle_states_chain = NULL;
8639 /* Finish work with abstract data `bundle states'. */
8641 static void
8642 finish_bundle_states (void)
8644 struct bundle_state *curr_state, *next_state;
8646 for (curr_state = allocated_bundle_states_chain;
8647 curr_state != NULL;
8648 curr_state = next_state)
8650 next_state = curr_state->allocated_states_chain;
8651 free (curr_state->dfa_state);
8652 free (curr_state);
8656 /* Hashtable helpers. */
8658 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8660 static inline hashval_t hash (const bundle_state *);
8661 static inline bool equal (const bundle_state *, const bundle_state *);
8664 /* The function returns hash of BUNDLE_STATE. */
8666 inline hashval_t
8667 bundle_state_hasher::hash (const bundle_state *state)
8669 unsigned result, i;
8671 for (result = i = 0; i < dfa_state_size; i++)
8672 result += (((unsigned char *) state->dfa_state) [i]
8673 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8674 return result + state->insn_num;
8677 /* The function returns nonzero if the bundle state keys are equal. */
8679 inline bool
8680 bundle_state_hasher::equal (const bundle_state *state1,
8681 const bundle_state *state2)
8683 return (state1->insn_num == state2->insn_num
8684 && memcmp (state1->dfa_state, state2->dfa_state,
8685 dfa_state_size) == 0);
8688 /* Hash table of the bundle states. The key is dfa_state and insn_num
8689 of the bundle states. */
8691 static hash_table<bundle_state_hasher> *bundle_state_table;
8693 /* The function inserts the BUNDLE_STATE into the hash table. The
8694 function returns nonzero if the bundle has been inserted into the
8695 table. The table contains the best bundle state with given key. */
8697 static int
8698 insert_bundle_state (struct bundle_state *bundle_state)
8700 struct bundle_state **entry_ptr;
8702 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8703 if (*entry_ptr == NULL)
8705 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8706 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8707 *entry_ptr = bundle_state;
8708 return TRUE;
8710 else if (bundle_state->cost < (*entry_ptr)->cost
8711 || (bundle_state->cost == (*entry_ptr)->cost
8712 && ((*entry_ptr)->accumulated_insns_num
8713 > bundle_state->accumulated_insns_num
8714 || ((*entry_ptr)->accumulated_insns_num
8715 == bundle_state->accumulated_insns_num
8716 && ((*entry_ptr)->branch_deviation
8717 > bundle_state->branch_deviation
8718 || ((*entry_ptr)->branch_deviation
8719 == bundle_state->branch_deviation
8720 && (*entry_ptr)->middle_bundle_stops
8721 > bundle_state->middle_bundle_stops))))))
8724 struct bundle_state temp;
8726 temp = **entry_ptr;
8727 **entry_ptr = *bundle_state;
8728 (*entry_ptr)->next = temp.next;
8729 *bundle_state = temp;
8731 return FALSE;
8734 /* Start work with the hash table. */
8736 static void
8737 initiate_bundle_state_table (void)
8739 bundle_state_table = new hash_table<bundle_state_hasher> (50);
8742 /* Finish work with the hash table. */
8744 static void
8745 finish_bundle_state_table (void)
8747 delete bundle_state_table;
8748 bundle_state_table = NULL;
8753 /* The following variable is a insn `nop' used to check bundle states
8754 with different number of inserted nops. */
8756 static rtx_insn *ia64_nop;
8758 /* The following function tries to issue NOPS_NUM nops for the current
8759 state without advancing processor cycle. If it failed, the
8760 function returns FALSE and frees the current state. */
8762 static int
8763 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8765 int i;
8767 for (i = 0; i < nops_num; i++)
8768 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8770 free_bundle_state (curr_state);
8771 return FALSE;
8773 return TRUE;
8776 /* The following function tries to issue INSN for the current
8777 state without advancing processor cycle. If it failed, the
8778 function returns FALSE and frees the current state. */
8780 static int
8781 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8783 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8785 free_bundle_state (curr_state);
8786 return FALSE;
8788 return TRUE;
8791 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8792 starting with ORIGINATOR without advancing processor cycle. If
8793 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8794 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8795 If it was successful, the function creates new bundle state and
8796 insert into the hash table and into `index_to_bundle_states'. */
8798 static void
8799 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8800 rtx_insn *insn, int try_bundle_end_p,
8801 int only_bundle_end_p)
8803 struct bundle_state *curr_state;
8805 curr_state = get_free_bundle_state ();
8806 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8807 curr_state->insn = insn;
8808 curr_state->insn_num = originator->insn_num + 1;
8809 curr_state->cost = originator->cost;
8810 curr_state->originator = originator;
8811 curr_state->before_nops_num = before_nops_num;
8812 curr_state->after_nops_num = 0;
8813 curr_state->accumulated_insns_num
8814 = originator->accumulated_insns_num + before_nops_num;
8815 curr_state->branch_deviation = originator->branch_deviation;
8816 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8817 gcc_assert (insn);
8818 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8820 gcc_assert (GET_MODE (insn) != TImode);
8821 if (!try_issue_nops (curr_state, before_nops_num))
8822 return;
8823 if (!try_issue_insn (curr_state, insn))
8824 return;
8825 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8826 if (curr_state->accumulated_insns_num % 3 != 0)
8827 curr_state->middle_bundle_stops++;
8828 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8829 && curr_state->accumulated_insns_num % 3 != 0)
8831 free_bundle_state (curr_state);
8832 return;
8835 else if (GET_MODE (insn) != TImode)
8837 if (!try_issue_nops (curr_state, before_nops_num))
8838 return;
8839 if (!try_issue_insn (curr_state, insn))
8840 return;
8841 curr_state->accumulated_insns_num++;
8842 gcc_assert (!unknown_for_bundling_p (insn));
8844 if (ia64_safe_type (insn) == TYPE_L)
8845 curr_state->accumulated_insns_num++;
8847 else
8849 /* If this is an insn that must be first in a group, then don't allow
8850 nops to be emitted before it. Currently, alloc is the only such
8851 supported instruction. */
8852 /* ??? The bundling automatons should handle this for us, but they do
8853 not yet have support for the first_insn attribute. */
8854 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8856 free_bundle_state (curr_state);
8857 return;
8860 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8861 state_transition (curr_state->dfa_state, NULL);
8862 curr_state->cost++;
8863 if (!try_issue_nops (curr_state, before_nops_num))
8864 return;
8865 if (!try_issue_insn (curr_state, insn))
8866 return;
8867 curr_state->accumulated_insns_num++;
8868 if (unknown_for_bundling_p (insn))
8870 /* Finish bundle containing asm insn. */
8871 curr_state->after_nops_num
8872 = 3 - curr_state->accumulated_insns_num % 3;
8873 curr_state->accumulated_insns_num
8874 += 3 - curr_state->accumulated_insns_num % 3;
8876 else if (ia64_safe_type (insn) == TYPE_L)
8877 curr_state->accumulated_insns_num++;
8879 if (ia64_safe_type (insn) == TYPE_B)
8880 curr_state->branch_deviation
8881 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8882 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8884 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8886 state_t dfa_state;
8887 struct bundle_state *curr_state1;
8888 struct bundle_state *allocated_states_chain;
8890 curr_state1 = get_free_bundle_state ();
8891 dfa_state = curr_state1->dfa_state;
8892 allocated_states_chain = curr_state1->allocated_states_chain;
8893 *curr_state1 = *curr_state;
8894 curr_state1->dfa_state = dfa_state;
8895 curr_state1->allocated_states_chain = allocated_states_chain;
8896 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8897 dfa_state_size);
8898 curr_state = curr_state1;
8900 if (!try_issue_nops (curr_state,
8901 3 - curr_state->accumulated_insns_num % 3))
8902 return;
8903 curr_state->after_nops_num
8904 = 3 - curr_state->accumulated_insns_num % 3;
8905 curr_state->accumulated_insns_num
8906 += 3 - curr_state->accumulated_insns_num % 3;
8908 if (!insert_bundle_state (curr_state))
8909 free_bundle_state (curr_state);
8910 return;
8913 /* The following function returns position in the two window bundle
8914 for given STATE. */
8916 static int
8917 get_max_pos (state_t state)
8919 if (cpu_unit_reservation_p (state, pos_6))
8920 return 6;
8921 else if (cpu_unit_reservation_p (state, pos_5))
8922 return 5;
8923 else if (cpu_unit_reservation_p (state, pos_4))
8924 return 4;
8925 else if (cpu_unit_reservation_p (state, pos_3))
8926 return 3;
8927 else if (cpu_unit_reservation_p (state, pos_2))
8928 return 2;
8929 else if (cpu_unit_reservation_p (state, pos_1))
8930 return 1;
8931 else
8932 return 0;
8935 /* The function returns code of a possible template for given position
8936 and state. The function should be called only with 2 values of
8937 position equal to 3 or 6. We avoid generating F NOPs by putting
8938 templates containing F insns at the end of the template search
8939 because undocumented anomaly in McKinley derived cores which can
8940 cause stalls if an F-unit insn (including a NOP) is issued within a
8941 six-cycle window after reading certain application registers (such
8942 as ar.bsp). Furthermore, power-considerations also argue against
8943 the use of F-unit instructions unless they're really needed. */
8945 static int
8946 get_template (state_t state, int pos)
8948 switch (pos)
8950 case 3:
8951 if (cpu_unit_reservation_p (state, _0mmi_))
8952 return 1;
8953 else if (cpu_unit_reservation_p (state, _0mii_))
8954 return 0;
8955 else if (cpu_unit_reservation_p (state, _0mmb_))
8956 return 7;
8957 else if (cpu_unit_reservation_p (state, _0mib_))
8958 return 6;
8959 else if (cpu_unit_reservation_p (state, _0mbb_))
8960 return 5;
8961 else if (cpu_unit_reservation_p (state, _0bbb_))
8962 return 4;
8963 else if (cpu_unit_reservation_p (state, _0mmf_))
8964 return 3;
8965 else if (cpu_unit_reservation_p (state, _0mfi_))
8966 return 2;
8967 else if (cpu_unit_reservation_p (state, _0mfb_))
8968 return 8;
8969 else if (cpu_unit_reservation_p (state, _0mlx_))
8970 return 9;
8971 else
8972 gcc_unreachable ();
8973 case 6:
8974 if (cpu_unit_reservation_p (state, _1mmi_))
8975 return 1;
8976 else if (cpu_unit_reservation_p (state, _1mii_))
8977 return 0;
8978 else if (cpu_unit_reservation_p (state, _1mmb_))
8979 return 7;
8980 else if (cpu_unit_reservation_p (state, _1mib_))
8981 return 6;
8982 else if (cpu_unit_reservation_p (state, _1mbb_))
8983 return 5;
8984 else if (cpu_unit_reservation_p (state, _1bbb_))
8985 return 4;
8986 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8987 return 3;
8988 else if (cpu_unit_reservation_p (state, _1mfi_))
8989 return 2;
8990 else if (cpu_unit_reservation_p (state, _1mfb_))
8991 return 8;
8992 else if (cpu_unit_reservation_p (state, _1mlx_))
8993 return 9;
8994 else
8995 gcc_unreachable ();
8996 default:
8997 gcc_unreachable ();
9001 /* True when INSN is important for bundling. */
9003 static bool
9004 important_for_bundling_p (rtx_insn *insn)
9006 return (INSN_P (insn)
9007 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
9008 && GET_CODE (PATTERN (insn)) != USE
9009 && GET_CODE (PATTERN (insn)) != CLOBBER);
9012 /* The following function returns an insn important for insn bundling
9013 followed by INSN and before TAIL. */
9015 static rtx_insn *
9016 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
9018 for (; insn && insn != tail; insn = NEXT_INSN (insn))
9019 if (important_for_bundling_p (insn))
9020 return insn;
9021 return NULL;
9024 /* True when INSN is unknown, but important, for bundling. */
9026 static bool
9027 unknown_for_bundling_p (rtx_insn *insn)
9029 return (INSN_P (insn)
9030 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
9031 && GET_CODE (PATTERN (insn)) != USE
9032 && GET_CODE (PATTERN (insn)) != CLOBBER);
9035 /* Add a bundle selector TEMPLATE0 before INSN. */
9037 static void
9038 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
9040 rtx b = gen_bundle_selector (GEN_INT (template0));
9042 ia64_emit_insn_before (b, insn);
9043 #if NR_BUNDLES == 10
9044 if ((template0 == 4 || template0 == 5)
9045 && ia64_except_unwind_info (&global_options) == UI_TARGET)
9047 int i;
9048 rtx note = NULL_RTX;
9050 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9051 first or second slot. If it is and has REG_EH_NOTE set, copy it
9052 to following nops, as br.call sets rp to the address of following
9053 bundle and therefore an EH region end must be on a bundle
9054 boundary. */
9055 insn = PREV_INSN (insn);
9056 for (i = 0; i < 3; i++)
9059 insn = next_active_insn (insn);
9060 while (NONJUMP_INSN_P (insn)
9061 && get_attr_empty (insn) == EMPTY_YES);
9062 if (CALL_P (insn))
9063 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9064 else if (note)
9066 int code;
9068 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9069 || code == CODE_FOR_nop_b);
9070 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9071 note = NULL_RTX;
9072 else
9073 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9077 #endif
9080 /* The following function does insn bundling. Bundling means
9081 inserting templates and nop insns to fit insn groups into permitted
9082 templates. Instruction scheduling uses NDFA (non-deterministic
9083 finite automata) encoding informations about the templates and the
9084 inserted nops. Nondeterminism of the automata permits follows
9085 all possible insn sequences very fast.
9087 Unfortunately it is not possible to get information about inserting
9088 nop insns and used templates from the automata states. The
9089 automata only says that we can issue an insn possibly inserting
9090 some nops before it and using some template. Therefore insn
9091 bundling in this function is implemented by using DFA
9092 (deterministic finite automata). We follow all possible insn
9093 sequences by inserting 0-2 nops (that is what the NDFA describe for
9094 insn scheduling) before/after each insn being bundled. We know the
9095 start of simulated processor cycle from insn scheduling (insn
9096 starting a new cycle has TImode).
9098 Simple implementation of insn bundling would create enormous
9099 number of possible insn sequences satisfying information about new
9100 cycle ticks taken from the insn scheduling. To make the algorithm
9101 practical we use dynamic programming. Each decision (about
9102 inserting nops and implicitly about previous decisions) is described
9103 by structure bundle_state (see above). If we generate the same
9104 bundle state (key is automaton state after issuing the insns and
9105 nops for it), we reuse already generated one. As consequence we
9106 reject some decisions which cannot improve the solution and
9107 reduce memory for the algorithm.
9109 When we reach the end of EBB (extended basic block), we choose the
9110 best sequence and then, moving back in EBB, insert templates for
9111 the best alternative. The templates are taken from querying
9112 automaton state for each insn in chosen bundle states.
9114 So the algorithm makes two (forward and backward) passes through
9115 EBB. */
9117 static void
9118 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9120 struct bundle_state *curr_state, *next_state, *best_state;
9121 rtx_insn *insn, *next_insn;
9122 int insn_num;
9123 int i, bundle_end_p, only_bundle_end_p, asm_p;
9124 int pos = 0, max_pos, template0, template1;
9125 rtx_insn *b;
9126 enum attr_type type;
9128 insn_num = 0;
9129 /* Count insns in the EBB. */
9130 for (insn = NEXT_INSN (prev_head_insn);
9131 insn && insn != tail;
9132 insn = NEXT_INSN (insn))
9133 if (INSN_P (insn))
9134 insn_num++;
9135 if (insn_num == 0)
9136 return;
9137 bundling_p = 1;
9138 dfa_clean_insn_cache ();
9139 initiate_bundle_state_table ();
9140 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9141 /* First (forward) pass -- generation of bundle states. */
9142 curr_state = get_free_bundle_state ();
9143 curr_state->insn = NULL;
9144 curr_state->before_nops_num = 0;
9145 curr_state->after_nops_num = 0;
9146 curr_state->insn_num = 0;
9147 curr_state->cost = 0;
9148 curr_state->accumulated_insns_num = 0;
9149 curr_state->branch_deviation = 0;
9150 curr_state->middle_bundle_stops = 0;
9151 curr_state->next = NULL;
9152 curr_state->originator = NULL;
9153 state_reset (curr_state->dfa_state);
9154 index_to_bundle_states [0] = curr_state;
9155 insn_num = 0;
9156 /* Shift cycle mark if it is put on insn which could be ignored. */
9157 for (insn = NEXT_INSN (prev_head_insn);
9158 insn != tail;
9159 insn = NEXT_INSN (insn))
9160 if (INSN_P (insn)
9161 && !important_for_bundling_p (insn)
9162 && GET_MODE (insn) == TImode)
9164 PUT_MODE (insn, VOIDmode);
9165 for (next_insn = NEXT_INSN (insn);
9166 next_insn != tail;
9167 next_insn = NEXT_INSN (next_insn))
9168 if (important_for_bundling_p (next_insn)
9169 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9171 PUT_MODE (next_insn, TImode);
9172 break;
9175 /* Forward pass: generation of bundle states. */
9176 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9177 insn != NULL_RTX;
9178 insn = next_insn)
9180 gcc_assert (important_for_bundling_p (insn));
9181 type = ia64_safe_type (insn);
9182 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9183 insn_num++;
9184 index_to_bundle_states [insn_num] = NULL;
9185 for (curr_state = index_to_bundle_states [insn_num - 1];
9186 curr_state != NULL;
9187 curr_state = next_state)
9189 pos = curr_state->accumulated_insns_num % 3;
9190 next_state = curr_state->next;
9191 /* We must fill up the current bundle in order to start a
9192 subsequent asm insn in a new bundle. Asm insn is always
9193 placed in a separate bundle. */
9194 only_bundle_end_p
9195 = (next_insn != NULL_RTX
9196 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9197 && unknown_for_bundling_p (next_insn));
9198 /* We may fill up the current bundle if it is the cycle end
9199 without a group barrier. */
9200 bundle_end_p
9201 = (only_bundle_end_p || next_insn == NULL_RTX
9202 || (GET_MODE (next_insn) == TImode
9203 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9204 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9205 || type == TYPE_S)
9206 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9207 only_bundle_end_p);
9208 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9209 only_bundle_end_p);
9210 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9211 only_bundle_end_p);
9213 gcc_assert (index_to_bundle_states [insn_num]);
9214 for (curr_state = index_to_bundle_states [insn_num];
9215 curr_state != NULL;
9216 curr_state = curr_state->next)
9217 if (verbose >= 2 && dump)
9219 /* This structure is taken from generated code of the
9220 pipeline hazard recognizer (see file insn-attrtab.c).
9221 Please don't forget to change the structure if a new
9222 automaton is added to .md file. */
9223 struct DFA_chip
9225 unsigned short one_automaton_state;
9226 unsigned short oneb_automaton_state;
9227 unsigned short two_automaton_state;
9228 unsigned short twob_automaton_state;
9231 fprintf
9232 (dump,
9233 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9234 curr_state->unique_num,
9235 (curr_state->originator == NULL
9236 ? -1 : curr_state->originator->unique_num),
9237 curr_state->cost,
9238 curr_state->before_nops_num, curr_state->after_nops_num,
9239 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9240 curr_state->middle_bundle_stops,
9241 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9242 INSN_UID (insn));
9246 /* We should find a solution because the 2nd insn scheduling has
9247 found one. */
9248 gcc_assert (index_to_bundle_states [insn_num]);
9249 /* Find a state corresponding to the best insn sequence. */
9250 best_state = NULL;
9251 for (curr_state = index_to_bundle_states [insn_num];
9252 curr_state != NULL;
9253 curr_state = curr_state->next)
9254 /* We are just looking at the states with fully filled up last
9255 bundle. The first we prefer insn sequences with minimal cost
9256 then with minimal inserted nops and finally with branch insns
9257 placed in the 3rd slots. */
9258 if (curr_state->accumulated_insns_num % 3 == 0
9259 && (best_state == NULL || best_state->cost > curr_state->cost
9260 || (best_state->cost == curr_state->cost
9261 && (curr_state->accumulated_insns_num
9262 < best_state->accumulated_insns_num
9263 || (curr_state->accumulated_insns_num
9264 == best_state->accumulated_insns_num
9265 && (curr_state->branch_deviation
9266 < best_state->branch_deviation
9267 || (curr_state->branch_deviation
9268 == best_state->branch_deviation
9269 && curr_state->middle_bundle_stops
9270 < best_state->middle_bundle_stops)))))))
9271 best_state = curr_state;
9272 /* Second (backward) pass: adding nops and templates. */
9273 gcc_assert (best_state);
9274 insn_num = best_state->before_nops_num;
9275 template0 = template1 = -1;
9276 for (curr_state = best_state;
9277 curr_state->originator != NULL;
9278 curr_state = curr_state->originator)
9280 insn = curr_state->insn;
9281 asm_p = unknown_for_bundling_p (insn);
9282 insn_num++;
9283 if (verbose >= 2 && dump)
9285 struct DFA_chip
9287 unsigned short one_automaton_state;
9288 unsigned short oneb_automaton_state;
9289 unsigned short two_automaton_state;
9290 unsigned short twob_automaton_state;
9293 fprintf
9294 (dump,
9295 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9296 curr_state->unique_num,
9297 (curr_state->originator == NULL
9298 ? -1 : curr_state->originator->unique_num),
9299 curr_state->cost,
9300 curr_state->before_nops_num, curr_state->after_nops_num,
9301 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9302 curr_state->middle_bundle_stops,
9303 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9304 INSN_UID (insn));
9306 /* Find the position in the current bundle window. The window can
9307 contain at most two bundles. Two bundle window means that
9308 the processor will make two bundle rotation. */
9309 max_pos = get_max_pos (curr_state->dfa_state);
9310 if (max_pos == 6
9311 /* The following (negative template number) means that the
9312 processor did one bundle rotation. */
9313 || (max_pos == 3 && template0 < 0))
9315 /* We are at the end of the window -- find template(s) for
9316 its bundle(s). */
9317 pos = max_pos;
9318 if (max_pos == 3)
9319 template0 = get_template (curr_state->dfa_state, 3);
9320 else
9322 template1 = get_template (curr_state->dfa_state, 3);
9323 template0 = get_template (curr_state->dfa_state, 6);
9326 if (max_pos > 3 && template1 < 0)
9327 /* It may happen when we have the stop inside a bundle. */
9329 gcc_assert (pos <= 3);
9330 template1 = get_template (curr_state->dfa_state, 3);
9331 pos += 3;
9333 if (!asm_p)
9334 /* Emit nops after the current insn. */
9335 for (i = 0; i < curr_state->after_nops_num; i++)
9337 rtx nop_pat = gen_nop ();
9338 rtx_insn *nop = emit_insn_after (nop_pat, insn);
9339 pos--;
9340 gcc_assert (pos >= 0);
9341 if (pos % 3 == 0)
9343 /* We are at the start of a bundle: emit the template
9344 (it should be defined). */
9345 gcc_assert (template0 >= 0);
9346 ia64_add_bundle_selector_before (template0, nop);
9347 /* If we have two bundle window, we make one bundle
9348 rotation. Otherwise template0 will be undefined
9349 (negative value). */
9350 template0 = template1;
9351 template1 = -1;
9354 /* Move the position backward in the window. Group barrier has
9355 no slot. Asm insn takes all bundle. */
9356 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9357 && !unknown_for_bundling_p (insn))
9358 pos--;
9359 /* Long insn takes 2 slots. */
9360 if (ia64_safe_type (insn) == TYPE_L)
9361 pos--;
9362 gcc_assert (pos >= 0);
9363 if (pos % 3 == 0
9364 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9365 && !unknown_for_bundling_p (insn))
9367 /* The current insn is at the bundle start: emit the
9368 template. */
9369 gcc_assert (template0 >= 0);
9370 ia64_add_bundle_selector_before (template0, insn);
9371 b = PREV_INSN (insn);
9372 insn = b;
9373 /* See comment above in analogous place for emitting nops
9374 after the insn. */
9375 template0 = template1;
9376 template1 = -1;
9378 /* Emit nops after the current insn. */
9379 for (i = 0; i < curr_state->before_nops_num; i++)
9381 rtx nop_pat = gen_nop ();
9382 ia64_emit_insn_before (nop_pat, insn);
9383 rtx_insn *nop = PREV_INSN (insn);
9384 insn = nop;
9385 pos--;
9386 gcc_assert (pos >= 0);
9387 if (pos % 3 == 0)
9389 /* See comment above in analogous place for emitting nops
9390 after the insn. */
9391 gcc_assert (template0 >= 0);
9392 ia64_add_bundle_selector_before (template0, insn);
9393 b = PREV_INSN (insn);
9394 insn = b;
9395 template0 = template1;
9396 template1 = -1;
9401 if (flag_checking)
9403 /* Assert right calculation of middle_bundle_stops. */
9404 int num = best_state->middle_bundle_stops;
9405 bool start_bundle = true, end_bundle = false;
9407 for (insn = NEXT_INSN (prev_head_insn);
9408 insn && insn != tail;
9409 insn = NEXT_INSN (insn))
9411 if (!INSN_P (insn))
9412 continue;
9413 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9414 start_bundle = true;
9415 else
9417 rtx_insn *next_insn;
9419 for (next_insn = NEXT_INSN (insn);
9420 next_insn && next_insn != tail;
9421 next_insn = NEXT_INSN (next_insn))
9422 if (INSN_P (next_insn)
9423 && (ia64_safe_itanium_class (next_insn)
9424 != ITANIUM_CLASS_IGNORE
9425 || recog_memoized (next_insn)
9426 == CODE_FOR_bundle_selector)
9427 && GET_CODE (PATTERN (next_insn)) != USE
9428 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9429 break;
9431 end_bundle = next_insn == NULL_RTX
9432 || next_insn == tail
9433 || (INSN_P (next_insn)
9434 && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9435 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9436 && !start_bundle && !end_bundle
9437 && next_insn
9438 && !unknown_for_bundling_p (next_insn))
9439 num--;
9441 start_bundle = false;
9445 gcc_assert (num == 0);
9448 free (index_to_bundle_states);
9449 finish_bundle_state_table ();
9450 bundling_p = 0;
9451 dfa_clean_insn_cache ();
9454 /* The following function is called at the end of scheduling BB or
9455 EBB. After reload, it inserts stop bits and does insn bundling. */
9457 static void
9458 ia64_sched_finish (FILE *dump, int sched_verbose)
9460 if (sched_verbose)
9461 fprintf (dump, "// Finishing schedule.\n");
9462 if (!reload_completed)
9463 return;
9464 if (reload_completed)
9466 final_emit_insn_group_barriers (dump);
9467 bundling (dump, sched_verbose, current_sched_info->prev_head,
9468 current_sched_info->next_tail);
9469 if (sched_verbose && dump)
9470 fprintf (dump, "// finishing %d-%d\n",
9471 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9472 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9474 return;
9478 /* The following function inserts stop bits in scheduled BB or EBB. */
9480 static void
9481 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9483 rtx_insn *insn;
9484 int need_barrier_p = 0;
9485 int seen_good_insn = 0;
9487 init_insn_group_barriers ();
9489 for (insn = NEXT_INSN (current_sched_info->prev_head);
9490 insn != current_sched_info->next_tail;
9491 insn = NEXT_INSN (insn))
9493 if (BARRIER_P (insn))
9495 rtx_insn *last = prev_active_insn (insn);
9497 if (! last)
9498 continue;
9499 if (JUMP_TABLE_DATA_P (last))
9500 last = prev_active_insn (last);
9501 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9502 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9504 init_insn_group_barriers ();
9505 seen_good_insn = 0;
9506 need_barrier_p = 0;
9508 else if (NONDEBUG_INSN_P (insn))
9510 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9512 init_insn_group_barriers ();
9513 seen_good_insn = 0;
9514 need_barrier_p = 0;
9516 else if (need_barrier_p || group_barrier_needed (insn)
9517 || (mflag_sched_stop_bits_after_every_cycle
9518 && GET_MODE (insn) == TImode
9519 && seen_good_insn))
9521 if (TARGET_EARLY_STOP_BITS)
9523 rtx_insn *last;
9525 for (last = insn;
9526 last != current_sched_info->prev_head;
9527 last = PREV_INSN (last))
9528 if (INSN_P (last) && GET_MODE (last) == TImode
9529 && stops_p [INSN_UID (last)])
9530 break;
9531 if (last == current_sched_info->prev_head)
9532 last = insn;
9533 last = prev_active_insn (last);
9534 if (last
9535 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9536 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9537 last);
9538 init_insn_group_barriers ();
9539 for (last = NEXT_INSN (last);
9540 last != insn;
9541 last = NEXT_INSN (last))
9542 if (INSN_P (last))
9544 group_barrier_needed (last);
9545 if (recog_memoized (last) >= 0
9546 && important_for_bundling_p (last))
9547 seen_good_insn = 1;
9550 else
9552 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9553 insn);
9554 init_insn_group_barriers ();
9555 seen_good_insn = 0;
9557 group_barrier_needed (insn);
9558 if (recog_memoized (insn) >= 0
9559 && important_for_bundling_p (insn))
9560 seen_good_insn = 1;
9562 else if (recog_memoized (insn) >= 0
9563 && important_for_bundling_p (insn))
9564 seen_good_insn = 1;
9565 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9572 /* If the following function returns TRUE, we will use the DFA
9573 insn scheduler. */
9575 static int
9576 ia64_first_cycle_multipass_dfa_lookahead (void)
9578 return (reload_completed ? 6 : 4);
9581 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9583 static void
9584 ia64_init_dfa_pre_cycle_insn (void)
9586 if (temp_dfa_state == NULL)
9588 dfa_state_size = state_size ();
9589 temp_dfa_state = xmalloc (dfa_state_size);
9590 prev_cycle_state = xmalloc (dfa_state_size);
9592 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9593 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9594 recog_memoized (dfa_pre_cycle_insn);
9595 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9596 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9597 recog_memoized (dfa_stop_insn);
9600 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9601 used by the DFA insn scheduler. */
9603 static rtx
9604 ia64_dfa_pre_cycle_insn (void)
9606 return dfa_pre_cycle_insn;
9609 /* The following function returns TRUE if PRODUCER (of type ilog or
9610 ld) produces address for CONSUMER (of type st or stf). */
9613 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9615 rtx dest, reg, mem;
9617 gcc_assert (producer && consumer);
9618 dest = ia64_single_set (producer);
9619 gcc_assert (dest);
9620 reg = SET_DEST (dest);
9621 gcc_assert (reg);
9622 if (GET_CODE (reg) == SUBREG)
9623 reg = SUBREG_REG (reg);
9624 gcc_assert (GET_CODE (reg) == REG);
9626 dest = ia64_single_set (consumer);
9627 gcc_assert (dest);
9628 mem = SET_DEST (dest);
9629 gcc_assert (mem && GET_CODE (mem) == MEM);
9630 return reg_mentioned_p (reg, mem);
9633 /* The following function returns TRUE if PRODUCER (of type ilog or
9634 ld) produces address for CONSUMER (of type ld or fld). */
9637 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9639 rtx dest, src, reg, mem;
9641 gcc_assert (producer && consumer);
9642 dest = ia64_single_set (producer);
9643 gcc_assert (dest);
9644 reg = SET_DEST (dest);
9645 gcc_assert (reg);
9646 if (GET_CODE (reg) == SUBREG)
9647 reg = SUBREG_REG (reg);
9648 gcc_assert (GET_CODE (reg) == REG);
9650 src = ia64_single_set (consumer);
9651 gcc_assert (src);
9652 mem = SET_SRC (src);
9653 gcc_assert (mem);
9655 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9656 mem = XVECEXP (mem, 0, 0);
9657 else if (GET_CODE (mem) == IF_THEN_ELSE)
9658 /* ??? Is this bypass necessary for ld.c? */
9660 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9661 mem = XEXP (mem, 1);
9664 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9665 mem = XEXP (mem, 0);
9667 if (GET_CODE (mem) == UNSPEC)
9669 int c = XINT (mem, 1);
9671 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9672 || c == UNSPEC_LDSA);
9673 mem = XVECEXP (mem, 0, 0);
9676 /* Note that LO_SUM is used for GOT loads. */
9677 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9679 return reg_mentioned_p (reg, mem);
9682 /* The following function returns TRUE if INSN produces address for a
9683 load/store insn. We will place such insns into M slot because it
9684 decreases its latency time. */
9687 ia64_produce_address_p (rtx insn)
9689 return insn->call;
9693 /* Emit pseudo-ops for the assembler to describe predicate relations.
9694 At present this assumes that we only consider predicate pairs to
9695 be mutex, and that the assembler can deduce proper values from
9696 straight-line code. */
9698 static void
9699 emit_predicate_relation_info (void)
9701 basic_block bb;
9703 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9705 int r;
9706 rtx_insn *head = BB_HEAD (bb);
9708 /* We only need such notes at code labels. */
9709 if (! LABEL_P (head))
9710 continue;
9711 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9712 head = NEXT_INSN (head);
9714 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9715 grabbing the entire block of predicate registers. */
9716 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9717 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9719 rtx p = gen_rtx_REG (BImode, r);
9720 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9721 if (head == BB_END (bb))
9722 BB_END (bb) = n;
9723 head = n;
9727 /* Look for conditional calls that do not return, and protect predicate
9728 relations around them. Otherwise the assembler will assume the call
9729 returns, and complain about uses of call-clobbered predicates after
9730 the call. */
9731 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9733 rtx_insn *insn = BB_HEAD (bb);
9735 while (1)
9737 if (CALL_P (insn)
9738 && GET_CODE (PATTERN (insn)) == COND_EXEC
9739 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9741 rtx_insn *b =
9742 emit_insn_before (gen_safe_across_calls_all (), insn);
9743 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9744 if (BB_HEAD (bb) == insn)
9745 BB_HEAD (bb) = b;
9746 if (BB_END (bb) == insn)
9747 BB_END (bb) = a;
9750 if (insn == BB_END (bb))
9751 break;
9752 insn = NEXT_INSN (insn);
9757 /* Perform machine dependent operations on the rtl chain INSNS. */
9759 static void
9760 ia64_reorg (void)
9762 /* We are freeing block_for_insn in the toplev to keep compatibility
9763 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9764 compute_bb_for_insn ();
9766 /* If optimizing, we'll have split before scheduling. */
9767 if (optimize == 0)
9768 split_all_insns ();
9770 if (optimize && flag_schedule_insns_after_reload
9771 && dbg_cnt (ia64_sched2))
9773 basic_block bb;
9774 timevar_push (TV_SCHED2);
9775 ia64_final_schedule = 1;
9777 /* We can't let modulo-sched prevent us from scheduling any bbs,
9778 since we need the final schedule to produce bundle information. */
9779 FOR_EACH_BB_FN (bb, cfun)
9780 bb->flags &= ~BB_DISABLE_SCHEDULE;
9782 initiate_bundle_states ();
9783 ia64_nop = make_insn_raw (gen_nop ());
9784 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9785 recog_memoized (ia64_nop);
9786 clocks_length = get_max_uid () + 1;
9787 stops_p = XCNEWVEC (char, clocks_length);
9789 if (ia64_tune == PROCESSOR_ITANIUM2)
9791 pos_1 = get_cpu_unit_code ("2_1");
9792 pos_2 = get_cpu_unit_code ("2_2");
9793 pos_3 = get_cpu_unit_code ("2_3");
9794 pos_4 = get_cpu_unit_code ("2_4");
9795 pos_5 = get_cpu_unit_code ("2_5");
9796 pos_6 = get_cpu_unit_code ("2_6");
9797 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9798 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9799 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9800 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9801 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9802 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9803 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9804 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9805 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9806 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9807 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9808 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9809 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9810 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9811 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9812 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9813 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9814 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9815 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9816 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9818 else
9820 pos_1 = get_cpu_unit_code ("1_1");
9821 pos_2 = get_cpu_unit_code ("1_2");
9822 pos_3 = get_cpu_unit_code ("1_3");
9823 pos_4 = get_cpu_unit_code ("1_4");
9824 pos_5 = get_cpu_unit_code ("1_5");
9825 pos_6 = get_cpu_unit_code ("1_6");
9826 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9827 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9828 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9829 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9830 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9831 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9832 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9833 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9834 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9835 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9836 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9837 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9838 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9839 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9840 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9841 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9842 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9843 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9844 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9845 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9848 if (flag_selective_scheduling2
9849 && !maybe_skip_selective_scheduling ())
9850 run_selective_scheduling ();
9851 else
9852 schedule_ebbs ();
9854 /* Redo alignment computation, as it might gone wrong. */
9855 compute_alignments ();
9857 /* We cannot reuse this one because it has been corrupted by the
9858 evil glat. */
9859 finish_bundle_states ();
9860 free (stops_p);
9861 stops_p = NULL;
9862 emit_insn_group_barriers (dump_file);
9864 ia64_final_schedule = 0;
9865 timevar_pop (TV_SCHED2);
9867 else
9868 emit_all_insn_group_barriers (dump_file);
9870 df_analyze ();
9872 /* A call must not be the last instruction in a function, so that the
9873 return address is still within the function, so that unwinding works
9874 properly. Note that IA-64 differs from dwarf2 on this point. */
9875 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9877 rtx_insn *insn;
9878 int saw_stop = 0;
9880 insn = get_last_insn ();
9881 if (! INSN_P (insn))
9882 insn = prev_active_insn (insn);
9883 if (insn)
9885 /* Skip over insns that expand to nothing. */
9886 while (NONJUMP_INSN_P (insn)
9887 && get_attr_empty (insn) == EMPTY_YES)
9889 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9890 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9891 saw_stop = 1;
9892 insn = prev_active_insn (insn);
9894 if (CALL_P (insn))
9896 if (! saw_stop)
9897 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9898 emit_insn (gen_break_f ());
9899 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9904 emit_predicate_relation_info ();
9906 if (flag_var_tracking)
9908 timevar_push (TV_VAR_TRACKING);
9909 variable_tracking_main ();
9910 timevar_pop (TV_VAR_TRACKING);
9912 df_finish_pass (false);
9915 /* Return true if REGNO is used by the epilogue. */
9918 ia64_epilogue_uses (int regno)
9920 switch (regno)
9922 case R_GR (1):
9923 /* With a call to a function in another module, we will write a new
9924 value to "gp". After returning from such a call, we need to make
9925 sure the function restores the original gp-value, even if the
9926 function itself does not use the gp anymore. */
9927 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9929 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9930 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9931 /* For functions defined with the syscall_linkage attribute, all
9932 input registers are marked as live at all function exits. This
9933 prevents the register allocator from using the input registers,
9934 which in turn makes it possible to restart a system call after
9935 an interrupt without having to save/restore the input registers.
9936 This also prevents kernel data from leaking to application code. */
9937 return lookup_attribute ("syscall_linkage",
9938 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9940 case R_BR (0):
9941 /* Conditional return patterns can't represent the use of `b0' as
9942 the return address, so we force the value live this way. */
9943 return 1;
9945 case AR_PFS_REGNUM:
9946 /* Likewise for ar.pfs, which is used by br.ret. */
9947 return 1;
9949 default:
9950 return 0;
9954 /* Return true if REGNO is used by the frame unwinder. */
9957 ia64_eh_uses (int regno)
9959 unsigned int r;
9961 if (! reload_completed)
9962 return 0;
9964 if (regno == 0)
9965 return 0;
9967 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9968 if (regno == current_frame_info.r[r]
9969 || regno == emitted_frame_related_regs[r])
9970 return 1;
9972 return 0;
9975 /* Return true if this goes in small data/bss. */
9977 /* ??? We could also support own long data here. Generating movl/add/ld8
9978 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9979 code faster because there is one less load. This also includes incomplete
9980 types which can't go in sdata/sbss. */
9982 static bool
9983 ia64_in_small_data_p (const_tree exp)
9985 if (TARGET_NO_SDATA)
9986 return false;
9988 /* We want to merge strings, so we never consider them small data. */
9989 if (TREE_CODE (exp) == STRING_CST)
9990 return false;
9992 /* Functions are never small data. */
9993 if (TREE_CODE (exp) == FUNCTION_DECL)
9994 return false;
9996 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9998 const char *section = DECL_SECTION_NAME (exp);
10000 if (strcmp (section, ".sdata") == 0
10001 || strncmp (section, ".sdata.", 7) == 0
10002 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
10003 || strcmp (section, ".sbss") == 0
10004 || strncmp (section, ".sbss.", 6) == 0
10005 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
10006 return true;
10008 else
10010 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
10012 /* If this is an incomplete type with size 0, then we can't put it
10013 in sdata because it might be too big when completed. */
10014 if (size > 0 && size <= ia64_section_threshold)
10015 return true;
10018 return false;
10021 /* Output assembly directives for prologue regions. */
10023 /* The current basic block number. */
10025 static bool last_block;
10027 /* True if we need a copy_state command at the start of the next block. */
10029 static bool need_copy_state;
10031 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
10032 # define MAX_ARTIFICIAL_LABEL_BYTES 30
10033 #endif
10035 /* The function emits unwind directives for the start of an epilogue. */
10037 static void
10038 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
10039 bool unwind, bool frame ATTRIBUTE_UNUSED)
10041 /* If this isn't the last block of the function, then we need to label the
10042 current state, and copy it back in at the start of the next block. */
10044 if (!last_block)
10046 if (unwind)
10047 fprintf (asm_out_file, "\t.label_state %d\n",
10048 ++cfun->machine->state_num);
10049 need_copy_state = true;
10052 if (unwind)
10053 fprintf (asm_out_file, "\t.restore sp\n");
10056 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10058 static void
10059 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10060 bool unwind, bool frame)
10062 rtx dest = SET_DEST (pat);
10063 rtx src = SET_SRC (pat);
10065 if (dest == stack_pointer_rtx)
10067 if (GET_CODE (src) == PLUS)
10069 rtx op0 = XEXP (src, 0);
10070 rtx op1 = XEXP (src, 1);
10072 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10074 if (INTVAL (op1) < 0)
10076 gcc_assert (!frame_pointer_needed);
10077 if (unwind)
10078 fprintf (asm_out_file,
10079 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
10080 -INTVAL (op1));
10082 else
10083 process_epilogue (asm_out_file, insn, unwind, frame);
10085 else
10087 gcc_assert (src == hard_frame_pointer_rtx);
10088 process_epilogue (asm_out_file, insn, unwind, frame);
10091 else if (dest == hard_frame_pointer_rtx)
10093 gcc_assert (src == stack_pointer_rtx);
10094 gcc_assert (frame_pointer_needed);
10096 if (unwind)
10097 fprintf (asm_out_file, "\t.vframe r%d\n",
10098 ia64_dbx_register_number (REGNO (dest)));
10100 else
10101 gcc_unreachable ();
10104 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10106 static void
10107 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10109 rtx dest = SET_DEST (pat);
10110 rtx src = SET_SRC (pat);
10111 int dest_regno = REGNO (dest);
10112 int src_regno;
10114 if (src == pc_rtx)
10116 /* Saving return address pointer. */
10117 if (unwind)
10118 fprintf (asm_out_file, "\t.save rp, r%d\n",
10119 ia64_dbx_register_number (dest_regno));
10120 return;
10123 src_regno = REGNO (src);
10125 switch (src_regno)
10127 case PR_REG (0):
10128 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10129 if (unwind)
10130 fprintf (asm_out_file, "\t.save pr, r%d\n",
10131 ia64_dbx_register_number (dest_regno));
10132 break;
10134 case AR_UNAT_REGNUM:
10135 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10136 if (unwind)
10137 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10138 ia64_dbx_register_number (dest_regno));
10139 break;
10141 case AR_LC_REGNUM:
10142 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10143 if (unwind)
10144 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10145 ia64_dbx_register_number (dest_regno));
10146 break;
10148 default:
10149 /* Everything else should indicate being stored to memory. */
10150 gcc_unreachable ();
10154 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10156 static void
10157 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10159 rtx dest = SET_DEST (pat);
10160 rtx src = SET_SRC (pat);
10161 int src_regno = REGNO (src);
10162 const char *saveop;
10163 HOST_WIDE_INT off;
10164 rtx base;
10166 gcc_assert (MEM_P (dest));
10167 if (GET_CODE (XEXP (dest, 0)) == REG)
10169 base = XEXP (dest, 0);
10170 off = 0;
10172 else
10174 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10175 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10176 base = XEXP (XEXP (dest, 0), 0);
10177 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10180 if (base == hard_frame_pointer_rtx)
10182 saveop = ".savepsp";
10183 off = - off;
10185 else
10187 gcc_assert (base == stack_pointer_rtx);
10188 saveop = ".savesp";
10191 src_regno = REGNO (src);
10192 switch (src_regno)
10194 case BR_REG (0):
10195 gcc_assert (!current_frame_info.r[reg_save_b0]);
10196 if (unwind)
10197 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10198 saveop, off);
10199 break;
10201 case PR_REG (0):
10202 gcc_assert (!current_frame_info.r[reg_save_pr]);
10203 if (unwind)
10204 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10205 saveop, off);
10206 break;
10208 case AR_LC_REGNUM:
10209 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10210 if (unwind)
10211 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10212 saveop, off);
10213 break;
10215 case AR_PFS_REGNUM:
10216 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10217 if (unwind)
10218 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10219 saveop, off);
10220 break;
10222 case AR_UNAT_REGNUM:
10223 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10224 if (unwind)
10225 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10226 saveop, off);
10227 break;
10229 case GR_REG (4):
10230 case GR_REG (5):
10231 case GR_REG (6):
10232 case GR_REG (7):
10233 if (unwind)
10234 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10235 1 << (src_regno - GR_REG (4)));
10236 break;
10238 case BR_REG (1):
10239 case BR_REG (2):
10240 case BR_REG (3):
10241 case BR_REG (4):
10242 case BR_REG (5):
10243 if (unwind)
10244 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10245 1 << (src_regno - BR_REG (1)));
10246 break;
10248 case FR_REG (2):
10249 case FR_REG (3):
10250 case FR_REG (4):
10251 case FR_REG (5):
10252 if (unwind)
10253 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10254 1 << (src_regno - FR_REG (2)));
10255 break;
10257 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10258 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10259 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10260 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10261 if (unwind)
10262 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10263 1 << (src_regno - FR_REG (12)));
10264 break;
10266 default:
10267 /* ??? For some reason we mark other general registers, even those
10268 we can't represent in the unwind info. Ignore them. */
10269 break;
10273 /* This function looks at a single insn and emits any directives
10274 required to unwind this insn. */
10276 static void
10277 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10279 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10280 bool frame = dwarf2out_do_frame ();
10281 rtx note, pat;
10282 bool handled_one;
10284 if (!unwind && !frame)
10285 return;
10287 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10289 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10290 == EXIT_BLOCK_PTR_FOR_FN (cfun);
10292 /* Restore unwind state from immediately before the epilogue. */
10293 if (need_copy_state)
10295 if (unwind)
10297 fprintf (asm_out_file, "\t.body\n");
10298 fprintf (asm_out_file, "\t.copy_state %d\n",
10299 cfun->machine->state_num);
10301 need_copy_state = false;
10305 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10306 return;
10308 /* Look for the ALLOC insn. */
10309 if (INSN_CODE (insn) == CODE_FOR_alloc)
10311 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10312 int dest_regno = REGNO (dest);
10314 /* If this is the final destination for ar.pfs, then this must
10315 be the alloc in the prologue. */
10316 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10318 if (unwind)
10319 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10320 ia64_dbx_register_number (dest_regno));
10322 else
10324 /* This must be an alloc before a sibcall. We must drop the
10325 old frame info. The easiest way to drop the old frame
10326 info is to ensure we had a ".restore sp" directive
10327 followed by a new prologue. If the procedure doesn't
10328 have a memory-stack frame, we'll issue a dummy ".restore
10329 sp" now. */
10330 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10331 /* if haven't done process_epilogue() yet, do it now */
10332 process_epilogue (asm_out_file, insn, unwind, frame);
10333 if (unwind)
10334 fprintf (asm_out_file, "\t.prologue\n");
10336 return;
10339 handled_one = false;
10340 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10341 switch (REG_NOTE_KIND (note))
10343 case REG_CFA_ADJUST_CFA:
10344 pat = XEXP (note, 0);
10345 if (pat == NULL)
10346 pat = PATTERN (insn);
10347 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10348 handled_one = true;
10349 break;
10351 case REG_CFA_OFFSET:
10352 pat = XEXP (note, 0);
10353 if (pat == NULL)
10354 pat = PATTERN (insn);
10355 process_cfa_offset (asm_out_file, pat, unwind);
10356 handled_one = true;
10357 break;
10359 case REG_CFA_REGISTER:
10360 pat = XEXP (note, 0);
10361 if (pat == NULL)
10362 pat = PATTERN (insn);
10363 process_cfa_register (asm_out_file, pat, unwind);
10364 handled_one = true;
10365 break;
10367 case REG_FRAME_RELATED_EXPR:
10368 case REG_CFA_DEF_CFA:
10369 case REG_CFA_EXPRESSION:
10370 case REG_CFA_RESTORE:
10371 case REG_CFA_SET_VDRAP:
10372 /* Not used in the ia64 port. */
10373 gcc_unreachable ();
10375 default:
10376 /* Not a frame-related note. */
10377 break;
10380 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10381 explicit action to take. No guessing required. */
10382 gcc_assert (handled_one);
10385 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10387 static void
10388 ia64_asm_emit_except_personality (rtx personality)
10390 fputs ("\t.personality\t", asm_out_file);
10391 output_addr_const (asm_out_file, personality);
10392 fputc ('\n', asm_out_file);
10395 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10397 static void
10398 ia64_asm_init_sections (void)
10400 exception_section = get_unnamed_section (0, output_section_asm_op,
10401 "\t.handlerdata");
10404 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10406 static enum unwind_info_type
10407 ia64_debug_unwind_info (void)
10409 return UI_TARGET;
10412 enum ia64_builtins
10414 IA64_BUILTIN_BSP,
10415 IA64_BUILTIN_COPYSIGNQ,
10416 IA64_BUILTIN_FABSQ,
10417 IA64_BUILTIN_FLUSHRS,
10418 IA64_BUILTIN_INFQ,
10419 IA64_BUILTIN_HUGE_VALQ,
10420 IA64_BUILTIN_NANQ,
10421 IA64_BUILTIN_NANSQ,
10422 IA64_BUILTIN_max
10425 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10427 void
10428 ia64_init_builtins (void)
10430 tree fpreg_type;
10431 tree float80_type;
10432 tree decl;
10434 /* The __fpreg type. */
10435 fpreg_type = make_node (REAL_TYPE);
10436 TYPE_PRECISION (fpreg_type) = 82;
10437 layout_type (fpreg_type);
10438 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10440 /* The __float80 type. */
10441 if (float64x_type_node != NULL_TREE
10442 && TYPE_MODE (float64x_type_node) == XFmode)
10443 float80_type = float64x_type_node;
10444 else
10446 float80_type = make_node (REAL_TYPE);
10447 TYPE_PRECISION (float80_type) = 80;
10448 layout_type (float80_type);
10450 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10452 /* The __float128 type. */
10453 if (!TARGET_HPUX)
10455 tree ftype;
10456 tree const_string_type
10457 = build_pointer_type (build_qualified_type
10458 (char_type_node, TYPE_QUAL_CONST));
10460 (*lang_hooks.types.register_builtin_type) (float128_type_node,
10461 "__float128");
10463 /* TFmode support builtins. */
10464 ftype = build_function_type_list (float128_type_node, NULL_TREE);
10465 decl = add_builtin_function ("__builtin_infq", ftype,
10466 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10467 NULL, NULL_TREE);
10468 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10470 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10471 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10472 NULL, NULL_TREE);
10473 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10475 ftype = build_function_type_list (float128_type_node,
10476 const_string_type,
10477 NULL_TREE);
10478 decl = add_builtin_function ("__builtin_nanq", ftype,
10479 IA64_BUILTIN_NANQ, BUILT_IN_MD,
10480 "nanq", NULL_TREE);
10481 TREE_READONLY (decl) = 1;
10482 ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10484 decl = add_builtin_function ("__builtin_nansq", ftype,
10485 IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10486 "nansq", NULL_TREE);
10487 TREE_READONLY (decl) = 1;
10488 ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10490 ftype = build_function_type_list (float128_type_node,
10491 float128_type_node,
10492 NULL_TREE);
10493 decl = add_builtin_function ("__builtin_fabsq", ftype,
10494 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10495 "__fabstf2", NULL_TREE);
10496 TREE_READONLY (decl) = 1;
10497 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10499 ftype = build_function_type_list (float128_type_node,
10500 float128_type_node,
10501 float128_type_node,
10502 NULL_TREE);
10503 decl = add_builtin_function ("__builtin_copysignq", ftype,
10504 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10505 "__copysigntf3", NULL_TREE);
10506 TREE_READONLY (decl) = 1;
10507 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10509 else
10510 /* Under HPUX, this is a synonym for "long double". */
10511 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10512 "__float128");
10514 /* Fwrite on VMS is non-standard. */
10515 #if TARGET_ABI_OPEN_VMS
10516 vms_patch_builtins ();
10517 #endif
10519 #define def_builtin(name, type, code) \
10520 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10521 NULL, NULL_TREE)
10523 decl = def_builtin ("__builtin_ia64_bsp",
10524 build_function_type_list (ptr_type_node, NULL_TREE),
10525 IA64_BUILTIN_BSP);
10526 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10528 decl = def_builtin ("__builtin_ia64_flushrs",
10529 build_function_type_list (void_type_node, NULL_TREE),
10530 IA64_BUILTIN_FLUSHRS);
10531 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10533 #undef def_builtin
10535 if (TARGET_HPUX)
10537 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10538 set_user_assembler_name (decl, "_Isfinite");
10539 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10540 set_user_assembler_name (decl, "_Isfinitef");
10541 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10542 set_user_assembler_name (decl, "_Isfinitef128");
10546 static tree
10547 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10548 tree *args, bool ignore ATTRIBUTE_UNUSED)
10550 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10552 enum ia64_builtins fn_code = (enum ia64_builtins)
10553 DECL_FUNCTION_CODE (fndecl);
10554 switch (fn_code)
10556 case IA64_BUILTIN_NANQ:
10557 case IA64_BUILTIN_NANSQ:
10559 tree type = TREE_TYPE (TREE_TYPE (fndecl));
10560 const char *str = c_getstr (*args);
10561 int quiet = fn_code == IA64_BUILTIN_NANQ;
10562 REAL_VALUE_TYPE real;
10564 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10565 return build_real (type, real);
10566 return NULL_TREE;
10569 default:
10570 break;
10574 #ifdef SUBTARGET_FOLD_BUILTIN
10575 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10576 #endif
10578 return NULL_TREE;
10582 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10583 machine_mode mode ATTRIBUTE_UNUSED,
10584 int ignore ATTRIBUTE_UNUSED)
10586 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10587 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10589 switch (fcode)
10591 case IA64_BUILTIN_BSP:
10592 if (! target || ! register_operand (target, DImode))
10593 target = gen_reg_rtx (DImode);
10594 emit_insn (gen_bsp_value (target));
10595 #ifdef POINTERS_EXTEND_UNSIGNED
10596 target = convert_memory_address (ptr_mode, target);
10597 #endif
10598 return target;
10600 case IA64_BUILTIN_FLUSHRS:
10601 emit_insn (gen_flushrs ());
10602 return const0_rtx;
10604 case IA64_BUILTIN_INFQ:
10605 case IA64_BUILTIN_HUGE_VALQ:
10607 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10608 REAL_VALUE_TYPE inf;
10609 rtx tmp;
10611 real_inf (&inf);
10612 tmp = const_double_from_real_value (inf, target_mode);
10614 tmp = validize_mem (force_const_mem (target_mode, tmp));
10616 if (target == 0)
10617 target = gen_reg_rtx (target_mode);
10619 emit_move_insn (target, tmp);
10620 return target;
10623 case IA64_BUILTIN_NANQ:
10624 case IA64_BUILTIN_NANSQ:
10625 case IA64_BUILTIN_FABSQ:
10626 case IA64_BUILTIN_COPYSIGNQ:
10627 return expand_call (exp, target, ignore);
10629 default:
10630 gcc_unreachable ();
10633 return NULL_RTX;
10636 /* Return the ia64 builtin for CODE. */
10638 static tree
10639 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10641 if (code >= IA64_BUILTIN_max)
10642 return error_mark_node;
10644 return ia64_builtins[code];
10647 /* Implement TARGET_FUNCTION_ARG_PADDING.
10649 For the HP-UX IA64 aggregate parameters are passed stored in the
10650 most significant bits of the stack slot. */
10652 static pad_direction
10653 ia64_function_arg_padding (machine_mode mode, const_tree type)
10655 /* Exception to normal case for structures/unions/etc. */
10656 if (TARGET_HPUX
10657 && type
10658 && AGGREGATE_TYPE_P (type)
10659 && int_size_in_bytes (type) < UNITS_PER_WORD)
10660 return PAD_UPWARD;
10662 /* Fall back to the default. */
10663 return default_function_arg_padding (mode, type);
10666 /* Emit text to declare externally defined variables and functions, because
10667 the Intel assembler does not support undefined externals. */
10669 void
10670 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10672 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10673 set in order to avoid putting out names that are never really
10674 used. */
10675 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10677 /* maybe_assemble_visibility will return 1 if the assembler
10678 visibility directive is output. */
10679 int need_visibility = ((*targetm.binds_local_p) (decl)
10680 && maybe_assemble_visibility (decl));
10682 /* GNU as does not need anything here, but the HP linker does
10683 need something for external functions. */
10684 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10685 && TREE_CODE (decl) == FUNCTION_DECL)
10686 (*targetm.asm_out.globalize_decl_name) (file, decl);
10687 else if (need_visibility && !TARGET_GNU_AS)
10688 (*targetm.asm_out.globalize_label) (file, name);
10692 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10693 modes of word_mode and larger. Rename the TFmode libfuncs using the
10694 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10695 backward compatibility. */
10697 static void
10698 ia64_init_libfuncs (void)
10700 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10701 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10702 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10703 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10705 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10706 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10707 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10708 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10709 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10711 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10712 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10713 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10714 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10715 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10716 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10718 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10719 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10720 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10721 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10722 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10724 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10725 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10726 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10727 /* HP-UX 11.23 libc does not have a function for unsigned
10728 SImode-to-TFmode conversion. */
10729 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10732 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10734 static void
10735 ia64_hpux_init_libfuncs (void)
10737 ia64_init_libfuncs ();
10739 /* The HP SI millicode division and mod functions expect DI arguments.
10740 By turning them off completely we avoid using both libgcc and the
10741 non-standard millicode routines and use the HP DI millicode routines
10742 instead. */
10744 set_optab_libfunc (sdiv_optab, SImode, 0);
10745 set_optab_libfunc (udiv_optab, SImode, 0);
10746 set_optab_libfunc (smod_optab, SImode, 0);
10747 set_optab_libfunc (umod_optab, SImode, 0);
10749 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10750 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10751 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10752 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10754 /* HP-UX libc has TF min/max/abs routines in it. */
10755 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10756 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10757 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10759 /* ia64_expand_compare uses this. */
10760 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10762 /* These should never be used. */
10763 set_optab_libfunc (eq_optab, TFmode, 0);
10764 set_optab_libfunc (ne_optab, TFmode, 0);
10765 set_optab_libfunc (gt_optab, TFmode, 0);
10766 set_optab_libfunc (ge_optab, TFmode, 0);
10767 set_optab_libfunc (lt_optab, TFmode, 0);
10768 set_optab_libfunc (le_optab, TFmode, 0);
10771 /* Rename the division and modulus functions in VMS. */
10773 static void
10774 ia64_vms_init_libfuncs (void)
10776 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10777 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10778 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10779 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10780 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10781 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10782 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10783 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10784 #ifdef MEM_LIBFUNCS_INIT
10785 MEM_LIBFUNCS_INIT;
10786 #endif
10789 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10790 the HPUX conventions. */
10792 static void
10793 ia64_sysv4_init_libfuncs (void)
10795 ia64_init_libfuncs ();
10797 /* These functions are not part of the HPUX TFmode interface. We
10798 use them instead of _U_Qfcmp, which doesn't work the way we
10799 expect. */
10800 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10801 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10802 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10803 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10804 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10805 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10807 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10808 glibc doesn't have them. */
10811 /* Use soft-fp. */
10813 static void
10814 ia64_soft_fp_init_libfuncs (void)
10818 static bool
10819 ia64_vms_valid_pointer_mode (scalar_int_mode mode)
10821 return (mode == SImode || mode == DImode);
10824 /* For HPUX, it is illegal to have relocations in shared segments. */
10826 static int
10827 ia64_hpux_reloc_rw_mask (void)
10829 return 3;
10832 /* For others, relax this so that relocations to local data goes in
10833 read-only segments, but we still cannot allow global relocations
10834 in read-only segments. */
10836 static int
10837 ia64_reloc_rw_mask (void)
10839 return flag_pic ? 3 : 2;
10842 /* Return the section to use for X. The only special thing we do here
10843 is to honor small data. */
10845 static section *
10846 ia64_select_rtx_section (machine_mode mode, rtx x,
10847 unsigned HOST_WIDE_INT align)
10849 if (GET_MODE_SIZE (mode) > 0
10850 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10851 && !TARGET_NO_SDATA)
10852 return sdata_section;
10853 else
10854 return default_elf_select_rtx_section (mode, x, align);
10857 static unsigned int
10858 ia64_section_type_flags (tree decl, const char *name, int reloc)
10860 unsigned int flags = 0;
10862 if (strcmp (name, ".sdata") == 0
10863 || strncmp (name, ".sdata.", 7) == 0
10864 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10865 || strncmp (name, ".sdata2.", 8) == 0
10866 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10867 || strcmp (name, ".sbss") == 0
10868 || strncmp (name, ".sbss.", 6) == 0
10869 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10870 flags = SECTION_SMALL;
10872 flags |= default_section_type_flags (decl, name, reloc);
10873 return flags;
10876 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10877 structure type and that the address of that type should be passed
10878 in out0, rather than in r8. */
10880 static bool
10881 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10883 tree ret_type = TREE_TYPE (fntype);
10885 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10886 as the structure return address parameter, if the return value
10887 type has a non-trivial copy constructor or destructor. It is not
10888 clear if this same convention should be used for other
10889 programming languages. Until G++ 3.4, we incorrectly used r8 for
10890 these return values. */
10891 return (abi_version_at_least (2)
10892 && ret_type
10893 && TYPE_MODE (ret_type) == BLKmode
10894 && TREE_ADDRESSABLE (ret_type)
10895 && lang_GNU_CXX ());
10898 /* Output the assembler code for a thunk function. THUNK_DECL is the
10899 declaration for the thunk function itself, FUNCTION is the decl for
10900 the target function. DELTA is an immediate constant offset to be
10901 added to THIS. If VCALL_OFFSET is nonzero, the word at
10902 *(*this + vcall_offset) should be added to THIS. */
10904 static void
10905 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10906 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10907 tree function)
10909 rtx this_rtx, funexp;
10910 rtx_insn *insn;
10911 unsigned int this_parmno;
10912 unsigned int this_regno;
10913 rtx delta_rtx;
10915 reload_completed = 1;
10916 epilogue_completed = 1;
10918 /* Set things up as ia64_expand_prologue might. */
10919 last_scratch_gr_reg = 15;
10921 memset (&current_frame_info, 0, sizeof (current_frame_info));
10922 current_frame_info.spill_cfa_off = -16;
10923 current_frame_info.n_input_regs = 1;
10924 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10926 /* Mark the end of the (empty) prologue. */
10927 emit_note (NOTE_INSN_PROLOGUE_END);
10929 /* Figure out whether "this" will be the first parameter (the
10930 typical case) or the second parameter (as happens when the
10931 virtual function returns certain class objects). */
10932 this_parmno
10933 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10934 ? 1 : 0);
10935 this_regno = IN_REG (this_parmno);
10936 if (!TARGET_REG_NAMES)
10937 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10939 this_rtx = gen_rtx_REG (Pmode, this_regno);
10941 /* Apply the constant offset, if required. */
10942 delta_rtx = GEN_INT (delta);
10943 if (TARGET_ILP32)
10945 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10946 REG_POINTER (tmp) = 1;
10947 if (delta && satisfies_constraint_I (delta_rtx))
10949 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10950 delta = 0;
10952 else
10953 emit_insn (gen_ptr_extend (this_rtx, tmp));
10955 if (delta)
10957 if (!satisfies_constraint_I (delta_rtx))
10959 rtx tmp = gen_rtx_REG (Pmode, 2);
10960 emit_move_insn (tmp, delta_rtx);
10961 delta_rtx = tmp;
10963 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10966 /* Apply the offset from the vtable, if required. */
10967 if (vcall_offset)
10969 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10970 rtx tmp = gen_rtx_REG (Pmode, 2);
10972 if (TARGET_ILP32)
10974 rtx t = gen_rtx_REG (ptr_mode, 2);
10975 REG_POINTER (t) = 1;
10976 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10977 if (satisfies_constraint_I (vcall_offset_rtx))
10979 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10980 vcall_offset = 0;
10982 else
10983 emit_insn (gen_ptr_extend (tmp, t));
10985 else
10986 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10988 if (vcall_offset)
10990 if (!satisfies_constraint_J (vcall_offset_rtx))
10992 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10993 emit_move_insn (tmp2, vcall_offset_rtx);
10994 vcall_offset_rtx = tmp2;
10996 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10999 if (TARGET_ILP32)
11000 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
11001 else
11002 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
11004 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
11007 /* Generate a tail call to the target function. */
11008 if (! TREE_USED (function))
11010 assemble_external (function);
11011 TREE_USED (function) = 1;
11013 funexp = XEXP (DECL_RTL (function), 0);
11014 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11015 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
11016 insn = get_last_insn ();
11017 SIBLING_CALL_P (insn) = 1;
11019 /* Code generation for calls relies on splitting. */
11020 reload_completed = 1;
11021 epilogue_completed = 1;
11022 try_split (PATTERN (insn), insn, 0);
11024 emit_barrier ();
11026 /* Run just enough of rest_of_compilation to get the insns emitted.
11027 There's not really enough bulk here to make other passes such as
11028 instruction scheduling worth while. Note that use_thunk calls
11029 assemble_start_function and assemble_end_function. */
11031 emit_all_insn_group_barriers (NULL);
11032 insn = get_insns ();
11033 shorten_branches (insn);
11034 final_start_function (insn, file, 1);
11035 final (insn, file, 1);
11036 final_end_function ();
11038 reload_completed = 0;
11039 epilogue_completed = 0;
11042 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
11044 static rtx
11045 ia64_struct_value_rtx (tree fntype,
11046 int incoming ATTRIBUTE_UNUSED)
11048 if (TARGET_ABI_OPEN_VMS ||
11049 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
11050 return NULL_RTX;
11051 return gen_rtx_REG (Pmode, GR_REG (8));
11054 static bool
11055 ia64_scalar_mode_supported_p (scalar_mode mode)
11057 switch (mode)
11059 case E_QImode:
11060 case E_HImode:
11061 case E_SImode:
11062 case E_DImode:
11063 case E_TImode:
11064 return true;
11066 case E_SFmode:
11067 case E_DFmode:
11068 case E_XFmode:
11069 case E_RFmode:
11070 return true;
11072 case E_TFmode:
11073 return true;
11075 default:
11076 return false;
11080 static bool
11081 ia64_vector_mode_supported_p (machine_mode mode)
11083 switch (mode)
11085 case E_V8QImode:
11086 case E_V4HImode:
11087 case E_V2SImode:
11088 return true;
11090 case E_V2SFmode:
11091 return true;
11093 default:
11094 return false;
11098 /* Implement the FUNCTION_PROFILER macro. */
11100 void
11101 ia64_output_function_profiler (FILE *file, int labelno)
11103 bool indirect_call;
11105 /* If the function needs a static chain and the static chain
11106 register is r15, we use an indirect call so as to bypass
11107 the PLT stub in case the executable is dynamically linked,
11108 because the stub clobbers r15 as per 5.3.6 of the psABI.
11109 We don't need to do that in non canonical PIC mode. */
11111 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11113 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11114 indirect_call = true;
11116 else
11117 indirect_call = false;
11119 if (TARGET_GNU_AS)
11120 fputs ("\t.prologue 4, r40\n", file);
11121 else
11122 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11123 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11125 if (NO_PROFILE_COUNTERS)
11126 fputs ("\tmov out3 = r0\n", file);
11127 else
11129 char buf[20];
11130 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11132 if (TARGET_AUTO_PIC)
11133 fputs ("\tmovl out3 = @gprel(", file);
11134 else
11135 fputs ("\taddl out3 = @ltoff(", file);
11136 assemble_name (file, buf);
11137 if (TARGET_AUTO_PIC)
11138 fputs (")\n", file);
11139 else
11140 fputs ("), r1\n", file);
11143 if (indirect_call)
11144 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11145 fputs ("\t;;\n", file);
11147 fputs ("\t.save rp, r42\n", file);
11148 fputs ("\tmov out2 = b0\n", file);
11149 if (indirect_call)
11150 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11151 fputs ("\t.body\n", file);
11152 fputs ("\tmov out1 = r1\n", file);
11153 if (indirect_call)
11155 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11156 fputs ("\tmov b6 = r16\n", file);
11157 fputs ("\tld8 r1 = [r14]\n", file);
11158 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11160 else
11161 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11164 static GTY(()) rtx mcount_func_rtx;
11165 static rtx
11166 gen_mcount_func_rtx (void)
11168 if (!mcount_func_rtx)
11169 mcount_func_rtx = init_one_libfunc ("_mcount");
11170 return mcount_func_rtx;
11173 void
11174 ia64_profile_hook (int labelno)
11176 rtx label, ip;
11178 if (NO_PROFILE_COUNTERS)
11179 label = const0_rtx;
11180 else
11182 char buf[30];
11183 const char *label_name;
11184 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11185 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11186 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11187 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11189 ip = gen_reg_rtx (Pmode);
11190 emit_insn (gen_ip_value (ip));
11191 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11192 VOIDmode,
11193 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11194 ip, Pmode,
11195 label, Pmode);
11198 /* Return the mangling of TYPE if it is an extended fundamental type. */
11200 static const char *
11201 ia64_mangle_type (const_tree type)
11203 type = TYPE_MAIN_VARIANT (type);
11205 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11206 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11207 return NULL;
11209 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11210 mangled as "e". */
11211 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11212 return "g";
11213 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11214 an extended mangling. Elsewhere, "e" is available since long
11215 double is 80 bits. */
11216 if (TYPE_MODE (type) == XFmode)
11217 return TARGET_HPUX ? "u9__float80" : "e";
11218 if (TYPE_MODE (type) == RFmode)
11219 return "u7__fpreg";
11220 return NULL;
11223 /* Return the diagnostic message string if conversion from FROMTYPE to
11224 TOTYPE is not allowed, NULL otherwise. */
11225 static const char *
11226 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11228 /* Reject nontrivial conversion to or from __fpreg. */
11229 if (TYPE_MODE (fromtype) == RFmode
11230 && TYPE_MODE (totype) != RFmode
11231 && TYPE_MODE (totype) != VOIDmode)
11232 return N_("invalid conversion from %<__fpreg%>");
11233 if (TYPE_MODE (totype) == RFmode
11234 && TYPE_MODE (fromtype) != RFmode)
11235 return N_("invalid conversion to %<__fpreg%>");
11236 return NULL;
11239 /* Return the diagnostic message string if the unary operation OP is
11240 not permitted on TYPE, NULL otherwise. */
11241 static const char *
11242 ia64_invalid_unary_op (int op, const_tree type)
11244 /* Reject operations on __fpreg other than unary + or &. */
11245 if (TYPE_MODE (type) == RFmode
11246 && op != CONVERT_EXPR
11247 && op != ADDR_EXPR)
11248 return N_("invalid operation on %<__fpreg%>");
11249 return NULL;
11252 /* Return the diagnostic message string if the binary operation OP is
11253 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11254 static const char *
11255 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11257 /* Reject operations on __fpreg. */
11258 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11259 return N_("invalid operation on %<__fpreg%>");
11260 return NULL;
11263 /* HP-UX version_id attribute.
11264 For object foo, if the version_id is set to 1234 put out an alias
11265 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11266 other than an alias statement because it is an illegal symbol name. */
11268 static tree
11269 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11270 tree name ATTRIBUTE_UNUSED,
11271 tree args,
11272 int flags ATTRIBUTE_UNUSED,
11273 bool *no_add_attrs)
11275 tree arg = TREE_VALUE (args);
11277 if (TREE_CODE (arg) != STRING_CST)
11279 error("version attribute is not a string");
11280 *no_add_attrs = true;
11281 return NULL_TREE;
11283 return NULL_TREE;
11286 /* Target hook for c_mode_for_suffix. */
11288 static machine_mode
11289 ia64_c_mode_for_suffix (char suffix)
11291 if (suffix == 'q')
11292 return TFmode;
11293 if (suffix == 'w')
11294 return XFmode;
11296 return VOIDmode;
11299 static GTY(()) rtx ia64_dconst_0_5_rtx;
11302 ia64_dconst_0_5 (void)
11304 if (! ia64_dconst_0_5_rtx)
11306 REAL_VALUE_TYPE rv;
11307 real_from_string (&rv, "0.5");
11308 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11310 return ia64_dconst_0_5_rtx;
11313 static GTY(()) rtx ia64_dconst_0_375_rtx;
11316 ia64_dconst_0_375 (void)
11318 if (! ia64_dconst_0_375_rtx)
11320 REAL_VALUE_TYPE rv;
11321 real_from_string (&rv, "0.375");
11322 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11324 return ia64_dconst_0_375_rtx;
11327 static machine_mode
11328 ia64_get_reg_raw_mode (int regno)
11330 if (FR_REGNO_P (regno))
11331 return XFmode;
11332 return default_get_reg_raw_mode(regno);
11335 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11336 anymore. */
11338 bool
11339 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11341 return TARGET_HPUX && mode == TFmode;
11344 /* Always default to .text section until HP-UX linker is fixed. */
11346 ATTRIBUTE_UNUSED static section *
11347 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11348 enum node_frequency freq ATTRIBUTE_UNUSED,
11349 bool startup ATTRIBUTE_UNUSED,
11350 bool exit ATTRIBUTE_UNUSED)
11352 return NULL;
11355 /* Construct (set target (vec_select op0 (parallel perm))) and
11356 return true if that's a valid instruction in the active ISA. */
11358 static bool
11359 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11361 rtx rperm[MAX_VECT_LEN], x;
11362 unsigned i;
11364 for (i = 0; i < nelt; ++i)
11365 rperm[i] = GEN_INT (perm[i]);
11367 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11368 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11369 x = gen_rtx_SET (target, x);
11371 rtx_insn *insn = emit_insn (x);
11372 if (recog_memoized (insn) < 0)
11374 remove_insn (insn);
11375 return false;
11377 return true;
11380 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11382 static bool
11383 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11384 const unsigned char *perm, unsigned nelt)
11386 machine_mode v2mode;
11387 rtx x;
11389 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
11390 return false;
11391 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11392 return expand_vselect (target, x, perm, nelt);
11395 /* Try to expand a no-op permutation. */
11397 static bool
11398 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11400 unsigned i, nelt = d->nelt;
11402 for (i = 0; i < nelt; ++i)
11403 if (d->perm[i] != i)
11404 return false;
11406 if (!d->testing_p)
11407 emit_move_insn (d->target, d->op0);
11409 return true;
11412 /* Try to expand D via a shrp instruction. */
11414 static bool
11415 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11417 unsigned i, nelt = d->nelt, shift, mask;
11418 rtx tmp, hi, lo;
11420 /* ??? Don't force V2SFmode into the integer registers. */
11421 if (d->vmode == V2SFmode)
11422 return false;
11424 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11426 shift = d->perm[0];
11427 if (BYTES_BIG_ENDIAN && shift > nelt)
11428 return false;
11430 for (i = 1; i < nelt; ++i)
11431 if (d->perm[i] != ((shift + i) & mask))
11432 return false;
11434 if (d->testing_p)
11435 return true;
11437 hi = shift < nelt ? d->op1 : d->op0;
11438 lo = shift < nelt ? d->op0 : d->op1;
11440 shift %= nelt;
11442 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11444 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11445 gcc_assert (IN_RANGE (shift, 1, 63));
11447 /* Recall that big-endian elements are numbered starting at the top of
11448 the register. Ideally we'd have a shift-left-pair. But since we
11449 don't, convert to a shift the other direction. */
11450 if (BYTES_BIG_ENDIAN)
11451 shift = 64 - shift;
11453 tmp = gen_reg_rtx (DImode);
11454 hi = gen_lowpart (DImode, hi);
11455 lo = gen_lowpart (DImode, lo);
11456 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11458 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11459 return true;
11462 /* Try to instantiate D in a single instruction. */
11464 static bool
11465 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11467 unsigned i, nelt = d->nelt;
11468 unsigned char perm2[MAX_VECT_LEN];
11470 /* Try single-operand selections. */
11471 if (d->one_operand_p)
11473 if (expand_vec_perm_identity (d))
11474 return true;
11475 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11476 return true;
11479 /* Try two operand selections. */
11480 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11481 return true;
11483 /* Recognize interleave style patterns with reversed operands. */
11484 if (!d->one_operand_p)
11486 for (i = 0; i < nelt; ++i)
11488 unsigned e = d->perm[i];
11489 if (e >= nelt)
11490 e -= nelt;
11491 else
11492 e += nelt;
11493 perm2[i] = e;
11496 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11497 return true;
11500 if (expand_vec_perm_shrp (d))
11501 return true;
11503 /* ??? Look for deposit-like permutations where most of the result
11504 comes from one vector unchanged and the rest comes from a
11505 sequential hunk of the other vector. */
11507 return false;
11510 /* Pattern match broadcast permutations. */
11512 static bool
11513 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11515 unsigned i, elt, nelt = d->nelt;
11516 unsigned char perm2[2];
11517 rtx temp;
11518 bool ok;
11520 if (!d->one_operand_p)
11521 return false;
11523 elt = d->perm[0];
11524 for (i = 1; i < nelt; ++i)
11525 if (d->perm[i] != elt)
11526 return false;
11528 switch (d->vmode)
11530 case E_V2SImode:
11531 case E_V2SFmode:
11532 /* Implementable by interleave. */
11533 perm2[0] = elt;
11534 perm2[1] = elt + 2;
11535 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11536 gcc_assert (ok);
11537 break;
11539 case E_V8QImode:
11540 /* Implementable by extract + broadcast. */
11541 if (BYTES_BIG_ENDIAN)
11542 elt = 7 - elt;
11543 elt *= BITS_PER_UNIT;
11544 temp = gen_reg_rtx (DImode);
11545 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11546 GEN_INT (8), GEN_INT (elt)));
11547 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11548 break;
11550 case E_V4HImode:
11551 /* Should have been matched directly by vec_select. */
11552 default:
11553 gcc_unreachable ();
11556 return true;
11559 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11560 two vector permutation into a single vector permutation by using
11561 an interleave operation to merge the vectors. */
11563 static bool
11564 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11566 struct expand_vec_perm_d dremap, dfinal;
11567 unsigned char remap[2 * MAX_VECT_LEN];
11568 unsigned contents, i, nelt, nelt2;
11569 unsigned h0, h1, h2, h3;
11570 rtx_insn *seq;
11571 bool ok;
11573 if (d->one_operand_p)
11574 return false;
11576 nelt = d->nelt;
11577 nelt2 = nelt / 2;
11579 /* Examine from whence the elements come. */
11580 contents = 0;
11581 for (i = 0; i < nelt; ++i)
11582 contents |= 1u << d->perm[i];
11584 memset (remap, 0xff, sizeof (remap));
11585 dremap = *d;
11587 h0 = (1u << nelt2) - 1;
11588 h1 = h0 << nelt2;
11589 h2 = h0 << nelt;
11590 h3 = h0 << (nelt + nelt2);
11592 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11594 for (i = 0; i < nelt; ++i)
11596 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11597 remap[which] = i;
11598 dremap.perm[i] = which;
11601 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11603 for (i = 0; i < nelt; ++i)
11605 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11606 remap[which] = i;
11607 dremap.perm[i] = which;
11610 else if ((contents & 0x5555) == contents) /* mix even elements */
11612 for (i = 0; i < nelt; ++i)
11614 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11615 remap[which] = i;
11616 dremap.perm[i] = which;
11619 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11621 for (i = 0; i < nelt; ++i)
11623 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11624 remap[which] = i;
11625 dremap.perm[i] = which;
11628 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11630 unsigned shift = ctz_hwi (contents);
11631 for (i = 0; i < nelt; ++i)
11633 unsigned which = (i + shift) & (2 * nelt - 1);
11634 remap[which] = i;
11635 dremap.perm[i] = which;
11638 else
11639 return false;
11641 /* Use the remapping array set up above to move the elements from their
11642 swizzled locations into their final destinations. */
11643 dfinal = *d;
11644 for (i = 0; i < nelt; ++i)
11646 unsigned e = remap[d->perm[i]];
11647 gcc_assert (e < nelt);
11648 dfinal.perm[i] = e;
11650 if (d->testing_p)
11651 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11652 else
11653 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11654 dfinal.op1 = dfinal.op0;
11655 dfinal.one_operand_p = true;
11656 dremap.target = dfinal.op0;
11658 /* Test if the final remap can be done with a single insn. For V4HImode
11659 this *will* succeed. For V8QImode or V2SImode it may not. */
11660 start_sequence ();
11661 ok = expand_vec_perm_1 (&dfinal);
11662 seq = get_insns ();
11663 end_sequence ();
11664 if (!ok)
11665 return false;
11666 if (d->testing_p)
11667 return true;
11669 ok = expand_vec_perm_1 (&dremap);
11670 gcc_assert (ok);
11672 emit_insn (seq);
11673 return true;
11676 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11677 constant permutation via two mux2 and a merge. */
11679 static bool
11680 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11682 unsigned char perm2[4];
11683 rtx rmask[4];
11684 unsigned i;
11685 rtx t0, t1, mask, x;
11686 bool ok;
11688 if (d->vmode != V4HImode || d->one_operand_p)
11689 return false;
11690 if (d->testing_p)
11691 return true;
11693 for (i = 0; i < 4; ++i)
11695 perm2[i] = d->perm[i] & 3;
11696 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11698 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11699 mask = force_reg (V4HImode, mask);
11701 t0 = gen_reg_rtx (V4HImode);
11702 t1 = gen_reg_rtx (V4HImode);
11704 ok = expand_vselect (t0, d->op0, perm2, 4);
11705 gcc_assert (ok);
11706 ok = expand_vselect (t1, d->op1, perm2, 4);
11707 gcc_assert (ok);
11709 x = gen_rtx_AND (V4HImode, mask, t0);
11710 emit_insn (gen_rtx_SET (t0, x));
11712 x = gen_rtx_NOT (V4HImode, mask);
11713 x = gen_rtx_AND (V4HImode, x, t1);
11714 emit_insn (gen_rtx_SET (t1, x));
11716 x = gen_rtx_IOR (V4HImode, t0, t1);
11717 emit_insn (gen_rtx_SET (d->target, x));
11719 return true;
11722 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11723 With all of the interface bits taken care of, perform the expansion
11724 in D and return true on success. */
11726 static bool
11727 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11729 if (expand_vec_perm_1 (d))
11730 return true;
11731 if (expand_vec_perm_broadcast (d))
11732 return true;
11733 if (expand_vec_perm_interleave_2 (d))
11734 return true;
11735 if (expand_vec_perm_v4hi_5 (d))
11736 return true;
11737 return false;
11740 bool
11741 ia64_expand_vec_perm_const (rtx operands[4])
11743 struct expand_vec_perm_d d;
11744 unsigned char perm[MAX_VECT_LEN];
11745 int i, nelt, which;
11746 rtx sel;
11748 d.target = operands[0];
11749 d.op0 = operands[1];
11750 d.op1 = operands[2];
11751 sel = operands[3];
11753 d.vmode = GET_MODE (d.target);
11754 gcc_assert (VECTOR_MODE_P (d.vmode));
11755 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11756 d.testing_p = false;
11758 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11759 gcc_assert (XVECLEN (sel, 0) == nelt);
11760 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11762 for (i = which = 0; i < nelt; ++i)
11764 rtx e = XVECEXP (sel, 0, i);
11765 int ei = INTVAL (e) & (2 * nelt - 1);
11767 which |= (ei < nelt ? 1 : 2);
11768 d.perm[i] = ei;
11769 perm[i] = ei;
11772 switch (which)
11774 default:
11775 gcc_unreachable();
11777 case 3:
11778 if (!rtx_equal_p (d.op0, d.op1))
11780 d.one_operand_p = false;
11781 break;
11784 /* The elements of PERM do not suggest that only the first operand
11785 is used, but both operands are identical. Allow easier matching
11786 of the permutation by folding the permutation into the single
11787 input vector. */
11788 for (i = 0; i < nelt; ++i)
11789 if (d.perm[i] >= nelt)
11790 d.perm[i] -= nelt;
11791 /* FALLTHRU */
11793 case 1:
11794 d.op1 = d.op0;
11795 d.one_operand_p = true;
11796 break;
11798 case 2:
11799 for (i = 0; i < nelt; ++i)
11800 d.perm[i] -= nelt;
11801 d.op0 = d.op1;
11802 d.one_operand_p = true;
11803 break;
11806 if (ia64_expand_vec_perm_const_1 (&d))
11807 return true;
11809 /* If the mask says both arguments are needed, but they are the same,
11810 the above tried to expand with one_operand_p true. If that didn't
11811 work, retry with one_operand_p false, as that's what we used in _ok. */
11812 if (which == 3 && d.one_operand_p)
11814 memcpy (d.perm, perm, sizeof (perm));
11815 d.one_operand_p = false;
11816 return ia64_expand_vec_perm_const_1 (&d);
11819 return false;
11822 /* Implement targetm.vectorize.vec_perm_const_ok. */
11824 static bool
11825 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11826 const unsigned char *sel)
11828 struct expand_vec_perm_d d;
11829 unsigned int i, nelt, which;
11830 bool ret;
11832 d.vmode = vmode;
11833 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11834 d.testing_p = true;
11836 /* Extract the values from the vector CST into the permutation
11837 array in D. */
11838 memcpy (d.perm, sel, nelt);
11839 for (i = which = 0; i < nelt; ++i)
11841 unsigned char e = d.perm[i];
11842 gcc_assert (e < 2 * nelt);
11843 which |= (e < nelt ? 1 : 2);
11846 /* For all elements from second vector, fold the elements to first. */
11847 if (which == 2)
11848 for (i = 0; i < nelt; ++i)
11849 d.perm[i] -= nelt;
11851 /* Check whether the mask can be applied to the vector type. */
11852 d.one_operand_p = (which != 3);
11854 /* Otherwise we have to go through the motions and see if we can
11855 figure out how to generate the requested permutation. */
11856 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11857 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11858 if (!d.one_operand_p)
11859 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11861 start_sequence ();
11862 ret = ia64_expand_vec_perm_const_1 (&d);
11863 end_sequence ();
11865 return ret;
11868 void
11869 ia64_expand_vec_setv2sf (rtx operands[3])
11871 struct expand_vec_perm_d d;
11872 unsigned int which;
11873 bool ok;
11875 d.target = operands[0];
11876 d.op0 = operands[0];
11877 d.op1 = gen_reg_rtx (V2SFmode);
11878 d.vmode = V2SFmode;
11879 d.nelt = 2;
11880 d.one_operand_p = false;
11881 d.testing_p = false;
11883 which = INTVAL (operands[2]);
11884 gcc_assert (which <= 1);
11885 d.perm[0] = 1 - which;
11886 d.perm[1] = which + 2;
11888 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11890 ok = ia64_expand_vec_perm_const_1 (&d);
11891 gcc_assert (ok);
11894 void
11895 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11897 struct expand_vec_perm_d d;
11898 machine_mode vmode = GET_MODE (target);
11899 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11900 bool ok;
11902 d.target = target;
11903 d.op0 = op0;
11904 d.op1 = op1;
11905 d.vmode = vmode;
11906 d.nelt = nelt;
11907 d.one_operand_p = false;
11908 d.testing_p = false;
11910 for (i = 0; i < nelt; ++i)
11911 d.perm[i] = i * 2 + odd;
11913 ok = ia64_expand_vec_perm_const_1 (&d);
11914 gcc_assert (ok);
11917 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11919 In BR regs, we can't change the DImode at all.
11920 In FP regs, we can't change FP values to integer values and vice versa,
11921 but we can change e.g. DImode to SImode, and V2SFmode into DImode. */
11923 static bool
11924 ia64_can_change_mode_class (machine_mode from, machine_mode to,
11925 reg_class_t rclass)
11927 if (reg_classes_intersect_p (rclass, BR_REGS))
11928 return from == to;
11929 if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
11930 return !reg_classes_intersect_p (rclass, FR_REGS);
11931 return true;
11934 #include "gt-ia64.h"