gcc/
[official-gcc.git] / gcc / config / ia64 / ia64.c
blobe956144fccb58791e41bc55700c6074b810da52b
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "stringpool.h"
32 #include "stor-layout.h"
33 #include "calls.h"
34 #include "varasm.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "recog.h"
43 #include "function.h"
44 #include "expmed.h"
45 #include "dojump.h"
46 #include "explow.h"
47 #include "emit-rtl.h"
48 #include "stmt.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "except.h"
53 #include "predict.h"
54 #include "dominance.h"
55 #include "cfg.h"
56 #include "cfgrtl.h"
57 #include "cfganal.h"
58 #include "lcm.h"
59 #include "cfgbuild.h"
60 #include "cfgcleanup.h"
61 #include "basic-block.h"
62 #include "libfuncs.h"
63 #include "diagnostic-core.h"
64 #include "sched-int.h"
65 #include "timevar.h"
66 #include "target.h"
67 #include "common/common-target.h"
68 #include "tm_p.h"
69 #include "langhooks.h"
70 #include "tree-ssa-alias.h"
71 #include "internal-fn.h"
72 #include "gimple-fold.h"
73 #include "tree-eh.h"
74 #include "gimple-expr.h"
75 #include "gimple.h"
76 #include "gimplify.h"
77 #include "intl.h"
78 #include "df.h"
79 #include "debug.h"
80 #include "params.h"
81 #include "dbgcnt.h"
82 #include "tm-constrs.h"
83 #include "sel-sched.h"
84 #include "reload.h"
85 #include "opts.h"
86 #include "dumpfile.h"
87 #include "builtins.h"
89 #include "target-def.h"
91 /* This is used for communication between ASM_OUTPUT_LABEL and
92 ASM_OUTPUT_LABELREF. */
93 int ia64_asm_output_label = 0;
95 /* Register names for ia64_expand_prologue. */
96 static const char * const ia64_reg_numbers[96] =
97 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
98 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
99 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
100 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
101 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
102 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
103 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
104 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
105 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
106 "r104","r105","r106","r107","r108","r109","r110","r111",
107 "r112","r113","r114","r115","r116","r117","r118","r119",
108 "r120","r121","r122","r123","r124","r125","r126","r127"};
110 /* ??? These strings could be shared with REGISTER_NAMES. */
111 static const char * const ia64_input_reg_names[8] =
112 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
114 /* ??? These strings could be shared with REGISTER_NAMES. */
115 static const char * const ia64_local_reg_names[80] =
116 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
117 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
118 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
119 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
120 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
121 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
122 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
123 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
124 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
125 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
127 /* ??? These strings could be shared with REGISTER_NAMES. */
128 static const char * const ia64_output_reg_names[8] =
129 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
131 /* Variables which are this size or smaller are put in the sdata/sbss
132 sections. */
134 unsigned int ia64_section_threshold;
136 /* The following variable is used by the DFA insn scheduler. The value is
137 TRUE if we do insn bundling instead of insn scheduling. */
138 int bundling_p = 0;
140 enum ia64_frame_regs
142 reg_fp,
143 reg_save_b0,
144 reg_save_pr,
145 reg_save_ar_pfs,
146 reg_save_ar_unat,
147 reg_save_ar_lc,
148 reg_save_gp,
149 number_of_ia64_frame_regs
152 /* Structure to be filled in by ia64_compute_frame_size with register
153 save masks and offsets for the current function. */
155 struct ia64_frame_info
157 HOST_WIDE_INT total_size; /* size of the stack frame, not including
158 the caller's scratch area. */
159 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
160 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
161 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
162 HARD_REG_SET mask; /* mask of saved registers. */
163 unsigned int gr_used_mask; /* mask of registers in use as gr spill
164 registers or long-term scratches. */
165 int n_spilled; /* number of spilled registers. */
166 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
167 int n_input_regs; /* number of input registers used. */
168 int n_local_regs; /* number of local registers used. */
169 int n_output_regs; /* number of output registers used. */
170 int n_rotate_regs; /* number of rotating registers used. */
172 char need_regstk; /* true if a .regstk directive needed. */
173 char initialized; /* true if the data is finalized. */
176 /* Current frame information calculated by ia64_compute_frame_size. */
177 static struct ia64_frame_info current_frame_info;
178 /* The actual registers that are emitted. */
179 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
181 static int ia64_first_cycle_multipass_dfa_lookahead (void);
182 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
183 static void ia64_init_dfa_pre_cycle_insn (void);
184 static rtx ia64_dfa_pre_cycle_insn (void);
185 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
186 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
187 static void ia64_h_i_d_extended (void);
188 static void * ia64_alloc_sched_context (void);
189 static void ia64_init_sched_context (void *, bool);
190 static void ia64_set_sched_context (void *);
191 static void ia64_clear_sched_context (void *);
192 static void ia64_free_sched_context (void *);
193 static int ia64_mode_to_int (machine_mode);
194 static void ia64_set_sched_flags (spec_info_t);
195 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
196 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
197 static bool ia64_skip_rtx_p (const_rtx);
198 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
199 static bool ia64_needs_block_p (ds_t);
200 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
201 static int ia64_spec_check_p (rtx);
202 static int ia64_spec_check_src_p (rtx);
203 static rtx gen_tls_get_addr (void);
204 static rtx gen_thread_pointer (void);
205 static int find_gr_spill (enum ia64_frame_regs, int);
206 static int next_scratch_gr_reg (void);
207 static void mark_reg_gr_used_mask (rtx, void *);
208 static void ia64_compute_frame_size (HOST_WIDE_INT);
209 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
210 static void finish_spill_pointers (void);
211 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
212 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
213 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
214 static rtx gen_movdi_x (rtx, rtx, rtx);
215 static rtx gen_fr_spill_x (rtx, rtx, rtx);
216 static rtx gen_fr_restore_x (rtx, rtx, rtx);
218 static void ia64_option_override (void);
219 static bool ia64_can_eliminate (const int, const int);
220 static machine_mode hfa_element_mode (const_tree, bool);
221 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
222 tree, int *, int);
223 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
224 tree, bool);
225 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
226 const_tree, bool, bool);
227 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
228 const_tree, bool);
229 static rtx ia64_function_incoming_arg (cumulative_args_t,
230 machine_mode, const_tree, bool);
231 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
232 const_tree, bool);
233 static unsigned int ia64_function_arg_boundary (machine_mode,
234 const_tree);
235 static bool ia64_function_ok_for_sibcall (tree, tree);
236 static bool ia64_return_in_memory (const_tree, const_tree);
237 static rtx ia64_function_value (const_tree, const_tree, bool);
238 static rtx ia64_libcall_value (machine_mode, const_rtx);
239 static bool ia64_function_value_regno_p (const unsigned int);
240 static int ia64_register_move_cost (machine_mode, reg_class_t,
241 reg_class_t);
242 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
243 bool);
244 static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
245 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
246 static void fix_range (const char *);
247 static struct machine_function * ia64_init_machine_status (void);
248 static void emit_insn_group_barriers (FILE *);
249 static void emit_all_insn_group_barriers (FILE *);
250 static void final_emit_insn_group_barriers (FILE *);
251 static void emit_predicate_relation_info (void);
252 static void ia64_reorg (void);
253 static bool ia64_in_small_data_p (const_tree);
254 static void process_epilogue (FILE *, rtx, bool, bool);
256 static bool ia64_assemble_integer (rtx, unsigned int, int);
257 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
258 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
259 static void ia64_output_function_end_prologue (FILE *);
261 static void ia64_print_operand (FILE *, rtx, int);
262 static void ia64_print_operand_address (FILE *, rtx);
263 static bool ia64_print_operand_punct_valid_p (unsigned char code);
265 static int ia64_issue_rate (void);
266 static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t);
267 static void ia64_sched_init (FILE *, int, int);
268 static void ia64_sched_init_global (FILE *, int, int);
269 static void ia64_sched_finish_global (FILE *, int);
270 static void ia64_sched_finish (FILE *, int);
271 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
272 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
273 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
274 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
276 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
277 static void ia64_asm_emit_except_personality (rtx);
278 static void ia64_asm_init_sections (void);
280 static enum unwind_info_type ia64_debug_unwind_info (void);
282 static struct bundle_state *get_free_bundle_state (void);
283 static void free_bundle_state (struct bundle_state *);
284 static void initiate_bundle_states (void);
285 static void finish_bundle_states (void);
286 static int insert_bundle_state (struct bundle_state *);
287 static void initiate_bundle_state_table (void);
288 static void finish_bundle_state_table (void);
289 static int try_issue_nops (struct bundle_state *, int);
290 static int try_issue_insn (struct bundle_state *, rtx);
291 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
292 int, int);
293 static int get_max_pos (state_t);
294 static int get_template (state_t, int);
296 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
297 static bool important_for_bundling_p (rtx_insn *);
298 static bool unknown_for_bundling_p (rtx_insn *);
299 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
301 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
302 HOST_WIDE_INT, tree);
303 static void ia64_file_start (void);
304 static void ia64_globalize_decl_name (FILE *, tree);
306 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
307 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
308 static section *ia64_select_rtx_section (machine_mode, rtx,
309 unsigned HOST_WIDE_INT);
310 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
311 ATTRIBUTE_UNUSED;
312 static unsigned int ia64_section_type_flags (tree, const char *, int);
313 static void ia64_init_libfuncs (void)
314 ATTRIBUTE_UNUSED;
315 static void ia64_hpux_init_libfuncs (void)
316 ATTRIBUTE_UNUSED;
317 static void ia64_sysv4_init_libfuncs (void)
318 ATTRIBUTE_UNUSED;
319 static void ia64_vms_init_libfuncs (void)
320 ATTRIBUTE_UNUSED;
321 static void ia64_soft_fp_init_libfuncs (void)
322 ATTRIBUTE_UNUSED;
323 static bool ia64_vms_valid_pointer_mode (machine_mode mode)
324 ATTRIBUTE_UNUSED;
325 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
326 ATTRIBUTE_UNUSED;
328 static bool ia64_attribute_takes_identifier_p (const_tree);
329 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
330 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
331 static void ia64_encode_section_info (tree, rtx, int);
332 static rtx ia64_struct_value_rtx (tree, int);
333 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
334 static bool ia64_scalar_mode_supported_p (machine_mode mode);
335 static bool ia64_vector_mode_supported_p (machine_mode mode);
336 static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode);
337 static bool ia64_legitimate_constant_p (machine_mode, rtx);
338 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
339 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
340 static const char *ia64_mangle_type (const_tree);
341 static const char *ia64_invalid_conversion (const_tree, const_tree);
342 static const char *ia64_invalid_unary_op (int, const_tree);
343 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
344 static machine_mode ia64_c_mode_for_suffix (char);
345 static void ia64_trampoline_init (rtx, tree, rtx);
346 static void ia64_override_options_after_change (void);
347 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
349 static tree ia64_builtin_decl (unsigned, bool);
351 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
352 static machine_mode ia64_get_reg_raw_mode (int regno);
353 static section * ia64_hpux_function_section (tree, enum node_frequency,
354 bool, bool);
356 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
357 const unsigned char *sel);
359 #define MAX_VECT_LEN 8
361 struct expand_vec_perm_d
363 rtx target, op0, op1;
364 unsigned char perm[MAX_VECT_LEN];
365 machine_mode vmode;
366 unsigned char nelt;
367 bool one_operand_p;
368 bool testing_p;
371 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
374 /* Table of valid machine attributes. */
375 static const struct attribute_spec ia64_attribute_table[] =
377 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
378 affects_type_identity } */
379 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
380 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
381 false },
382 #if TARGET_ABI_OPEN_VMS
383 { "common_object", 1, 1, true, false, false,
384 ia64_vms_common_object_attribute, false },
385 #endif
386 { "version_id", 1, 1, true, false, false,
387 ia64_handle_version_id_attribute, false },
388 { NULL, 0, 0, false, false, false, NULL, false }
391 /* Initialize the GCC target structure. */
392 #undef TARGET_ATTRIBUTE_TABLE
393 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
395 #undef TARGET_INIT_BUILTINS
396 #define TARGET_INIT_BUILTINS ia64_init_builtins
398 #undef TARGET_EXPAND_BUILTIN
399 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
401 #undef TARGET_BUILTIN_DECL
402 #define TARGET_BUILTIN_DECL ia64_builtin_decl
404 #undef TARGET_ASM_BYTE_OP
405 #define TARGET_ASM_BYTE_OP "\tdata1\t"
406 #undef TARGET_ASM_ALIGNED_HI_OP
407 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
408 #undef TARGET_ASM_ALIGNED_SI_OP
409 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
410 #undef TARGET_ASM_ALIGNED_DI_OP
411 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
412 #undef TARGET_ASM_UNALIGNED_HI_OP
413 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
414 #undef TARGET_ASM_UNALIGNED_SI_OP
415 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
416 #undef TARGET_ASM_UNALIGNED_DI_OP
417 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
418 #undef TARGET_ASM_INTEGER
419 #define TARGET_ASM_INTEGER ia64_assemble_integer
421 #undef TARGET_OPTION_OVERRIDE
422 #define TARGET_OPTION_OVERRIDE ia64_option_override
424 #undef TARGET_ASM_FUNCTION_PROLOGUE
425 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
426 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
427 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
428 #undef TARGET_ASM_FUNCTION_EPILOGUE
429 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
431 #undef TARGET_PRINT_OPERAND
432 #define TARGET_PRINT_OPERAND ia64_print_operand
433 #undef TARGET_PRINT_OPERAND_ADDRESS
434 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
435 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
436 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
438 #undef TARGET_IN_SMALL_DATA_P
439 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
441 #undef TARGET_SCHED_ADJUST_COST_2
442 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
443 #undef TARGET_SCHED_ISSUE_RATE
444 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
445 #undef TARGET_SCHED_VARIABLE_ISSUE
446 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
447 #undef TARGET_SCHED_INIT
448 #define TARGET_SCHED_INIT ia64_sched_init
449 #undef TARGET_SCHED_FINISH
450 #define TARGET_SCHED_FINISH ia64_sched_finish
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
453 #undef TARGET_SCHED_FINISH_GLOBAL
454 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
455 #undef TARGET_SCHED_REORDER
456 #define TARGET_SCHED_REORDER ia64_sched_reorder
457 #undef TARGET_SCHED_REORDER2
458 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
460 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
461 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
463 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
464 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
466 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
467 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
468 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
469 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
471 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
472 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
473 ia64_first_cycle_multipass_dfa_lookahead_guard
475 #undef TARGET_SCHED_DFA_NEW_CYCLE
476 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
478 #undef TARGET_SCHED_H_I_D_EXTENDED
479 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
481 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
482 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
484 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
485 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
487 #undef TARGET_SCHED_SET_SCHED_CONTEXT
488 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
490 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
491 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
493 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
494 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
496 #undef TARGET_SCHED_SET_SCHED_FLAGS
497 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
499 #undef TARGET_SCHED_GET_INSN_SPEC_DS
500 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
502 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
503 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
505 #undef TARGET_SCHED_SPECULATE_INSN
506 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
508 #undef TARGET_SCHED_NEEDS_BLOCK_P
509 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
511 #undef TARGET_SCHED_GEN_SPEC_CHECK
512 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
514 #undef TARGET_SCHED_SKIP_RTX_P
515 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
517 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
518 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
519 #undef TARGET_ARG_PARTIAL_BYTES
520 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
521 #undef TARGET_FUNCTION_ARG
522 #define TARGET_FUNCTION_ARG ia64_function_arg
523 #undef TARGET_FUNCTION_INCOMING_ARG
524 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
525 #undef TARGET_FUNCTION_ARG_ADVANCE
526 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
527 #undef TARGET_FUNCTION_ARG_BOUNDARY
528 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
530 #undef TARGET_ASM_OUTPUT_MI_THUNK
531 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
532 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
533 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
535 #undef TARGET_ASM_FILE_START
536 #define TARGET_ASM_FILE_START ia64_file_start
538 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
539 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
541 #undef TARGET_REGISTER_MOVE_COST
542 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
543 #undef TARGET_MEMORY_MOVE_COST
544 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
545 #undef TARGET_RTX_COSTS
546 #define TARGET_RTX_COSTS ia64_rtx_costs
547 #undef TARGET_ADDRESS_COST
548 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
550 #undef TARGET_UNSPEC_MAY_TRAP_P
551 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
553 #undef TARGET_MACHINE_DEPENDENT_REORG
554 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
556 #undef TARGET_ENCODE_SECTION_INFO
557 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
559 #undef TARGET_SECTION_TYPE_FLAGS
560 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
562 #ifdef HAVE_AS_TLS
563 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
564 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
565 #endif
567 /* ??? Investigate. */
568 #if 0
569 #undef TARGET_PROMOTE_PROTOTYPES
570 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
571 #endif
573 #undef TARGET_FUNCTION_VALUE
574 #define TARGET_FUNCTION_VALUE ia64_function_value
575 #undef TARGET_LIBCALL_VALUE
576 #define TARGET_LIBCALL_VALUE ia64_libcall_value
577 #undef TARGET_FUNCTION_VALUE_REGNO_P
578 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
580 #undef TARGET_STRUCT_VALUE_RTX
581 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
582 #undef TARGET_RETURN_IN_MEMORY
583 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
584 #undef TARGET_SETUP_INCOMING_VARARGS
585 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
586 #undef TARGET_STRICT_ARGUMENT_NAMING
587 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
588 #undef TARGET_MUST_PASS_IN_STACK
589 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
590 #undef TARGET_GET_RAW_RESULT_MODE
591 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
592 #undef TARGET_GET_RAW_ARG_MODE
593 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
595 #undef TARGET_MEMBER_TYPE_FORCES_BLK
596 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
598 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
599 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
601 #undef TARGET_ASM_UNWIND_EMIT
602 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
603 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
604 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
605 #undef TARGET_ASM_INIT_SECTIONS
606 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
608 #undef TARGET_DEBUG_UNWIND_INFO
609 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
611 #undef TARGET_SCALAR_MODE_SUPPORTED_P
612 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
613 #undef TARGET_VECTOR_MODE_SUPPORTED_P
614 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
616 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
617 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
618 ia64_libgcc_floating_mode_supported_p
620 #undef TARGET_LEGITIMATE_CONSTANT_P
621 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
622 #undef TARGET_LEGITIMATE_ADDRESS_P
623 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
625 #undef TARGET_CANNOT_FORCE_CONST_MEM
626 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
628 #undef TARGET_MANGLE_TYPE
629 #define TARGET_MANGLE_TYPE ia64_mangle_type
631 #undef TARGET_INVALID_CONVERSION
632 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
633 #undef TARGET_INVALID_UNARY_OP
634 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
635 #undef TARGET_INVALID_BINARY_OP
636 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
638 #undef TARGET_C_MODE_FOR_SUFFIX
639 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
641 #undef TARGET_CAN_ELIMINATE
642 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
644 #undef TARGET_TRAMPOLINE_INIT
645 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
647 #undef TARGET_CAN_USE_DOLOOP_P
648 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
649 #undef TARGET_INVALID_WITHIN_DOLOOP
650 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
652 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
653 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
655 #undef TARGET_PREFERRED_RELOAD_CLASS
656 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
658 #undef TARGET_DELAY_SCHED2
659 #define TARGET_DELAY_SCHED2 true
661 /* Variable tracking should be run after all optimizations which
662 change order of insns. It also needs a valid CFG. */
663 #undef TARGET_DELAY_VARTRACK
664 #define TARGET_DELAY_VARTRACK true
666 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
667 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
669 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
670 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
672 struct gcc_target targetm = TARGET_INITIALIZER;
674 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
675 identifier as an argument, so the front end shouldn't look it up. */
677 static bool
678 ia64_attribute_takes_identifier_p (const_tree attr_id)
680 if (is_attribute_p ("model", attr_id))
681 return true;
682 #if TARGET_ABI_OPEN_VMS
683 if (is_attribute_p ("common_object", attr_id))
684 return true;
685 #endif
686 return false;
689 typedef enum
691 ADDR_AREA_NORMAL, /* normal address area */
692 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
694 ia64_addr_area;
696 static GTY(()) tree small_ident1;
697 static GTY(()) tree small_ident2;
699 static void
700 init_idents (void)
702 if (small_ident1 == 0)
704 small_ident1 = get_identifier ("small");
705 small_ident2 = get_identifier ("__small__");
709 /* Retrieve the address area that has been chosen for the given decl. */
711 static ia64_addr_area
712 ia64_get_addr_area (tree decl)
714 tree model_attr;
716 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
717 if (model_attr)
719 tree id;
721 init_idents ();
722 id = TREE_VALUE (TREE_VALUE (model_attr));
723 if (id == small_ident1 || id == small_ident2)
724 return ADDR_AREA_SMALL;
726 return ADDR_AREA_NORMAL;
729 static tree
730 ia64_handle_model_attribute (tree *node, tree name, tree args,
731 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
733 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
734 ia64_addr_area area;
735 tree arg, decl = *node;
737 init_idents ();
738 arg = TREE_VALUE (args);
739 if (arg == small_ident1 || arg == small_ident2)
741 addr_area = ADDR_AREA_SMALL;
743 else
745 warning (OPT_Wattributes, "invalid argument of %qE attribute",
746 name);
747 *no_add_attrs = true;
750 switch (TREE_CODE (decl))
752 case VAR_DECL:
753 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
754 == FUNCTION_DECL)
755 && !TREE_STATIC (decl))
757 error_at (DECL_SOURCE_LOCATION (decl),
758 "an address area attribute cannot be specified for "
759 "local variables");
760 *no_add_attrs = true;
762 area = ia64_get_addr_area (decl);
763 if (area != ADDR_AREA_NORMAL && addr_area != area)
765 error ("address area of %q+D conflicts with previous "
766 "declaration", decl);
767 *no_add_attrs = true;
769 break;
771 case FUNCTION_DECL:
772 error_at (DECL_SOURCE_LOCATION (decl),
773 "address area attribute cannot be specified for "
774 "functions");
775 *no_add_attrs = true;
776 break;
778 default:
779 warning (OPT_Wattributes, "%qE attribute ignored",
780 name);
781 *no_add_attrs = true;
782 break;
785 return NULL_TREE;
788 /* Part of the low level implementation of DEC Ada pragma Common_Object which
789 enables the shared use of variables stored in overlaid linker areas
790 corresponding to the use of Fortran COMMON. */
792 static tree
793 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
794 int flags ATTRIBUTE_UNUSED,
795 bool *no_add_attrs)
797 tree decl = *node;
798 tree id;
800 gcc_assert (DECL_P (decl));
802 DECL_COMMON (decl) = 1;
803 id = TREE_VALUE (args);
804 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
806 error ("%qE attribute requires a string constant argument", name);
807 *no_add_attrs = true;
808 return NULL_TREE;
810 return NULL_TREE;
813 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
815 void
816 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
817 unsigned HOST_WIDE_INT size,
818 unsigned int align)
820 tree attr = DECL_ATTRIBUTES (decl);
822 if (attr)
823 attr = lookup_attribute ("common_object", attr);
824 if (attr)
826 tree id = TREE_VALUE (TREE_VALUE (attr));
827 const char *name;
829 if (TREE_CODE (id) == IDENTIFIER_NODE)
830 name = IDENTIFIER_POINTER (id);
831 else if (TREE_CODE (id) == STRING_CST)
832 name = TREE_STRING_POINTER (id);
833 else
834 abort ();
836 fprintf (file, "\t.vms_common\t\"%s\",", name);
838 else
839 fprintf (file, "%s", COMMON_ASM_OP);
841 /* Code from elfos.h. */
842 assemble_name (file, name);
843 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
844 size, align / BITS_PER_UNIT);
846 fputc ('\n', file);
849 static void
850 ia64_encode_addr_area (tree decl, rtx symbol)
852 int flags;
854 flags = SYMBOL_REF_FLAGS (symbol);
855 switch (ia64_get_addr_area (decl))
857 case ADDR_AREA_NORMAL: break;
858 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
859 default: gcc_unreachable ();
861 SYMBOL_REF_FLAGS (symbol) = flags;
864 static void
865 ia64_encode_section_info (tree decl, rtx rtl, int first)
867 default_encode_section_info (decl, rtl, first);
869 /* Careful not to prod global register variables. */
870 if (TREE_CODE (decl) == VAR_DECL
871 && GET_CODE (DECL_RTL (decl)) == MEM
872 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
873 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
874 ia64_encode_addr_area (decl, XEXP (rtl, 0));
877 /* Return 1 if the operands of a move are ok. */
880 ia64_move_ok (rtx dst, rtx src)
882 /* If we're under init_recog_no_volatile, we'll not be able to use
883 memory_operand. So check the code directly and don't worry about
884 the validity of the underlying address, which should have been
885 checked elsewhere anyway. */
886 if (GET_CODE (dst) != MEM)
887 return 1;
888 if (GET_CODE (src) == MEM)
889 return 0;
890 if (register_operand (src, VOIDmode))
891 return 1;
893 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
894 if (INTEGRAL_MODE_P (GET_MODE (dst)))
895 return src == const0_rtx;
896 else
897 return satisfies_constraint_G (src);
900 /* Return 1 if the operands are ok for a floating point load pair. */
903 ia64_load_pair_ok (rtx dst, rtx src)
905 /* ??? There is a thinko in the implementation of the "x" constraint and the
906 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
907 also return false for it. */
908 if (GET_CODE (dst) != REG
909 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
910 return 0;
911 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
912 return 0;
913 switch (GET_CODE (XEXP (src, 0)))
915 case REG:
916 case POST_INC:
917 break;
918 case POST_DEC:
919 return 0;
920 case POST_MODIFY:
922 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
924 if (GET_CODE (adjust) != CONST_INT
925 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
926 return 0;
928 break;
929 default:
930 abort ();
932 return 1;
936 addp4_optimize_ok (rtx op1, rtx op2)
938 return (basereg_operand (op1, GET_MODE(op1)) !=
939 basereg_operand (op2, GET_MODE(op2)));
942 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
943 Return the length of the field, or <= 0 on failure. */
946 ia64_depz_field_mask (rtx rop, rtx rshift)
948 unsigned HOST_WIDE_INT op = INTVAL (rop);
949 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
951 /* Get rid of the zero bits we're shifting in. */
952 op >>= shift;
954 /* We must now have a solid block of 1's at bit 0. */
955 return exact_log2 (op + 1);
958 /* Return the TLS model to use for ADDR. */
960 static enum tls_model
961 tls_symbolic_operand_type (rtx addr)
963 enum tls_model tls_kind = TLS_MODEL_NONE;
965 if (GET_CODE (addr) == CONST)
967 if (GET_CODE (XEXP (addr, 0)) == PLUS
968 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
969 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
971 else if (GET_CODE (addr) == SYMBOL_REF)
972 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
974 return tls_kind;
977 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
978 as a base register. */
980 static inline bool
981 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
983 if (strict
984 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
985 return true;
986 else if (!strict
987 && (GENERAL_REGNO_P (REGNO (reg))
988 || !HARD_REGISTER_P (reg)))
989 return true;
990 else
991 return false;
994 static bool
995 ia64_legitimate_address_reg (const_rtx reg, bool strict)
997 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
998 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
999 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1000 return true;
1002 return false;
1005 static bool
1006 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1008 if (GET_CODE (disp) == PLUS
1009 && rtx_equal_p (reg, XEXP (disp, 0))
1010 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1011 || (CONST_INT_P (XEXP (disp, 1))
1012 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1013 return true;
1015 return false;
1018 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1020 static bool
1021 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1022 rtx x, bool strict)
1024 if (ia64_legitimate_address_reg (x, strict))
1025 return true;
1026 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1027 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1028 && XEXP (x, 0) != arg_pointer_rtx)
1029 return true;
1030 else if (GET_CODE (x) == POST_MODIFY
1031 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1032 && XEXP (x, 0) != arg_pointer_rtx
1033 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1034 return true;
1035 else
1036 return false;
1039 /* Return true if X is a constant that is valid for some immediate
1040 field in an instruction. */
1042 static bool
1043 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1045 switch (GET_CODE (x))
1047 case CONST_INT:
1048 case LABEL_REF:
1049 return true;
1051 case CONST_DOUBLE:
1052 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1053 return true;
1054 return satisfies_constraint_G (x);
1056 case CONST:
1057 case SYMBOL_REF:
1058 /* ??? Short term workaround for PR 28490. We must make the code here
1059 match the code in ia64_expand_move and move_operand, even though they
1060 are both technically wrong. */
1061 if (tls_symbolic_operand_type (x) == 0)
1063 HOST_WIDE_INT addend = 0;
1064 rtx op = x;
1066 if (GET_CODE (op) == CONST
1067 && GET_CODE (XEXP (op, 0)) == PLUS
1068 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1070 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1071 op = XEXP (XEXP (op, 0), 0);
1074 if (any_offset_symbol_operand (op, mode)
1075 || function_operand (op, mode))
1076 return true;
1077 if (aligned_offset_symbol_operand (op, mode))
1078 return (addend & 0x3fff) == 0;
1079 return false;
1081 return false;
1083 case CONST_VECTOR:
1084 if (mode == V2SFmode)
1085 return satisfies_constraint_Y (x);
1087 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1088 && GET_MODE_SIZE (mode) <= 8);
1090 default:
1091 return false;
1095 /* Don't allow TLS addresses to get spilled to memory. */
1097 static bool
1098 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1100 if (mode == RFmode)
1101 return true;
1102 return tls_symbolic_operand_type (x) != 0;
1105 /* Expand a symbolic constant load. */
1107 bool
1108 ia64_expand_load_address (rtx dest, rtx src)
1110 gcc_assert (GET_CODE (dest) == REG);
1112 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1113 having to pointer-extend the value afterward. Other forms of address
1114 computation below are also more natural to compute as 64-bit quantities.
1115 If we've been given an SImode destination register, change it. */
1116 if (GET_MODE (dest) != Pmode)
1117 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1118 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1120 if (TARGET_NO_PIC)
1121 return false;
1122 if (small_addr_symbolic_operand (src, VOIDmode))
1123 return false;
1125 if (TARGET_AUTO_PIC)
1126 emit_insn (gen_load_gprel64 (dest, src));
1127 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1128 emit_insn (gen_load_fptr (dest, src));
1129 else if (sdata_symbolic_operand (src, VOIDmode))
1130 emit_insn (gen_load_gprel (dest, src));
1131 else
1133 HOST_WIDE_INT addend = 0;
1134 rtx tmp;
1136 /* We did split constant offsets in ia64_expand_move, and we did try
1137 to keep them split in move_operand, but we also allowed reload to
1138 rematerialize arbitrary constants rather than spill the value to
1139 the stack and reload it. So we have to be prepared here to split
1140 them apart again. */
1141 if (GET_CODE (src) == CONST)
1143 HOST_WIDE_INT hi, lo;
1145 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1146 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1147 hi = hi - lo;
1149 if (lo != 0)
1151 addend = lo;
1152 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1156 tmp = gen_rtx_HIGH (Pmode, src);
1157 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1158 emit_insn (gen_rtx_SET (dest, tmp));
1160 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1161 emit_insn (gen_rtx_SET (dest, tmp));
1163 if (addend)
1165 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1166 emit_insn (gen_rtx_SET (dest, tmp));
1170 return true;
1173 static GTY(()) rtx gen_tls_tga;
1174 static rtx
1175 gen_tls_get_addr (void)
1177 if (!gen_tls_tga)
1178 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1179 return gen_tls_tga;
1182 static GTY(()) rtx thread_pointer_rtx;
1183 static rtx
1184 gen_thread_pointer (void)
1186 if (!thread_pointer_rtx)
1187 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1188 return thread_pointer_rtx;
1191 static rtx
1192 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1193 rtx orig_op1, HOST_WIDE_INT addend)
1195 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1196 rtx_insn *insns;
1197 rtx orig_op0 = op0;
1198 HOST_WIDE_INT addend_lo, addend_hi;
1200 switch (tls_kind)
1202 case TLS_MODEL_GLOBAL_DYNAMIC:
1203 start_sequence ();
1205 tga_op1 = gen_reg_rtx (Pmode);
1206 emit_insn (gen_load_dtpmod (tga_op1, op1));
1208 tga_op2 = gen_reg_rtx (Pmode);
1209 emit_insn (gen_load_dtprel (tga_op2, op1));
1211 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1212 LCT_CONST, Pmode, 2, tga_op1,
1213 Pmode, tga_op2, Pmode);
1215 insns = get_insns ();
1216 end_sequence ();
1218 if (GET_MODE (op0) != Pmode)
1219 op0 = tga_ret;
1220 emit_libcall_block (insns, op0, tga_ret, op1);
1221 break;
1223 case TLS_MODEL_LOCAL_DYNAMIC:
1224 /* ??? This isn't the completely proper way to do local-dynamic
1225 If the call to __tls_get_addr is used only by a single symbol,
1226 then we should (somehow) move the dtprel to the second arg
1227 to avoid the extra add. */
1228 start_sequence ();
1230 tga_op1 = gen_reg_rtx (Pmode);
1231 emit_insn (gen_load_dtpmod (tga_op1, op1));
1233 tga_op2 = const0_rtx;
1235 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1236 LCT_CONST, Pmode, 2, tga_op1,
1237 Pmode, tga_op2, Pmode);
1239 insns = get_insns ();
1240 end_sequence ();
1242 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1243 UNSPEC_LD_BASE);
1244 tmp = gen_reg_rtx (Pmode);
1245 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1247 if (!register_operand (op0, Pmode))
1248 op0 = gen_reg_rtx (Pmode);
1249 if (TARGET_TLS64)
1251 emit_insn (gen_load_dtprel (op0, op1));
1252 emit_insn (gen_adddi3 (op0, tmp, op0));
1254 else
1255 emit_insn (gen_add_dtprel (op0, op1, tmp));
1256 break;
1258 case TLS_MODEL_INITIAL_EXEC:
1259 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1260 addend_hi = addend - addend_lo;
1262 op1 = plus_constant (Pmode, op1, addend_hi);
1263 addend = addend_lo;
1265 tmp = gen_reg_rtx (Pmode);
1266 emit_insn (gen_load_tprel (tmp, op1));
1268 if (!register_operand (op0, Pmode))
1269 op0 = gen_reg_rtx (Pmode);
1270 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1271 break;
1273 case TLS_MODEL_LOCAL_EXEC:
1274 if (!register_operand (op0, Pmode))
1275 op0 = gen_reg_rtx (Pmode);
1277 op1 = orig_op1;
1278 addend = 0;
1279 if (TARGET_TLS64)
1281 emit_insn (gen_load_tprel (op0, op1));
1282 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1284 else
1285 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1286 break;
1288 default:
1289 gcc_unreachable ();
1292 if (addend)
1293 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1294 orig_op0, 1, OPTAB_DIRECT);
1295 if (orig_op0 == op0)
1296 return NULL_RTX;
1297 if (GET_MODE (orig_op0) == Pmode)
1298 return op0;
1299 return gen_lowpart (GET_MODE (orig_op0), op0);
1303 ia64_expand_move (rtx op0, rtx op1)
1305 machine_mode mode = GET_MODE (op0);
1307 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1308 op1 = force_reg (mode, op1);
1310 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1312 HOST_WIDE_INT addend = 0;
1313 enum tls_model tls_kind;
1314 rtx sym = op1;
1316 if (GET_CODE (op1) == CONST
1317 && GET_CODE (XEXP (op1, 0)) == PLUS
1318 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1320 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1321 sym = XEXP (XEXP (op1, 0), 0);
1324 tls_kind = tls_symbolic_operand_type (sym);
1325 if (tls_kind)
1326 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1328 if (any_offset_symbol_operand (sym, mode))
1329 addend = 0;
1330 else if (aligned_offset_symbol_operand (sym, mode))
1332 HOST_WIDE_INT addend_lo, addend_hi;
1334 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1335 addend_hi = addend - addend_lo;
1337 if (addend_lo != 0)
1339 op1 = plus_constant (mode, sym, addend_hi);
1340 addend = addend_lo;
1342 else
1343 addend = 0;
1345 else
1346 op1 = sym;
1348 if (reload_completed)
1350 /* We really should have taken care of this offset earlier. */
1351 gcc_assert (addend == 0);
1352 if (ia64_expand_load_address (op0, op1))
1353 return NULL_RTX;
1356 if (addend)
1358 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1360 emit_insn (gen_rtx_SET (subtarget, op1));
1362 op1 = expand_simple_binop (mode, PLUS, subtarget,
1363 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1364 if (op0 == op1)
1365 return NULL_RTX;
1369 return op1;
1372 /* Split a move from OP1 to OP0 conditional on COND. */
1374 void
1375 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1377 rtx_insn *insn, *first = get_last_insn ();
1379 emit_move_insn (op0, op1);
1381 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1382 if (INSN_P (insn))
1383 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1384 PATTERN (insn));
1387 /* Split a post-reload TImode or TFmode reference into two DImode
1388 components. This is made extra difficult by the fact that we do
1389 not get any scratch registers to work with, because reload cannot
1390 be prevented from giving us a scratch that overlaps the register
1391 pair involved. So instead, when addressing memory, we tweak the
1392 pointer register up and back down with POST_INCs. Or up and not
1393 back down when we can get away with it.
1395 REVERSED is true when the loads must be done in reversed order
1396 (high word first) for correctness. DEAD is true when the pointer
1397 dies with the second insn we generate and therefore the second
1398 address must not carry a postmodify.
1400 May return an insn which is to be emitted after the moves. */
1402 static rtx
1403 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1405 rtx fixup = 0;
1407 switch (GET_CODE (in))
1409 case REG:
1410 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1411 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1412 break;
1414 case CONST_INT:
1415 case CONST_DOUBLE:
1416 /* Cannot occur reversed. */
1417 gcc_assert (!reversed);
1419 if (GET_MODE (in) != TFmode)
1420 split_double (in, &out[0], &out[1]);
1421 else
1422 /* split_double does not understand how to split a TFmode
1423 quantity into a pair of DImode constants. */
1425 REAL_VALUE_TYPE r;
1426 unsigned HOST_WIDE_INT p[2];
1427 long l[4]; /* TFmode is 128 bits */
1429 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1430 real_to_target (l, &r, TFmode);
1432 if (FLOAT_WORDS_BIG_ENDIAN)
1434 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1435 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1437 else
1439 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1440 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1442 out[0] = GEN_INT (p[0]);
1443 out[1] = GEN_INT (p[1]);
1445 break;
1447 case MEM:
1449 rtx base = XEXP (in, 0);
1450 rtx offset;
1452 switch (GET_CODE (base))
1454 case REG:
1455 if (!reversed)
1457 out[0] = adjust_automodify_address
1458 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1459 out[1] = adjust_automodify_address
1460 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1462 else
1464 /* Reversal requires a pre-increment, which can only
1465 be done as a separate insn. */
1466 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1467 out[0] = adjust_automodify_address
1468 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1469 out[1] = adjust_address (in, DImode, 0);
1471 break;
1473 case POST_INC:
1474 gcc_assert (!reversed && !dead);
1476 /* Just do the increment in two steps. */
1477 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1478 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1479 break;
1481 case POST_DEC:
1482 gcc_assert (!reversed && !dead);
1484 /* Add 8, subtract 24. */
1485 base = XEXP (base, 0);
1486 out[0] = adjust_automodify_address
1487 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1488 out[1] = adjust_automodify_address
1489 (in, DImode,
1490 gen_rtx_POST_MODIFY (Pmode, base,
1491 plus_constant (Pmode, base, -24)),
1493 break;
1495 case POST_MODIFY:
1496 gcc_assert (!reversed && !dead);
1498 /* Extract and adjust the modification. This case is
1499 trickier than the others, because we might have an
1500 index register, or we might have a combined offset that
1501 doesn't fit a signed 9-bit displacement field. We can
1502 assume the incoming expression is already legitimate. */
1503 offset = XEXP (base, 1);
1504 base = XEXP (base, 0);
1506 out[0] = adjust_automodify_address
1507 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1509 if (GET_CODE (XEXP (offset, 1)) == REG)
1511 /* Can't adjust the postmodify to match. Emit the
1512 original, then a separate addition insn. */
1513 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1514 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1516 else
1518 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1519 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1521 /* Again the postmodify cannot be made to match,
1522 but in this case it's more efficient to get rid
1523 of the postmodify entirely and fix up with an
1524 add insn. */
1525 out[1] = adjust_automodify_address (in, DImode, base, 8);
1526 fixup = gen_adddi3
1527 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1529 else
1531 /* Combined offset still fits in the displacement field.
1532 (We cannot overflow it at the high end.) */
1533 out[1] = adjust_automodify_address
1534 (in, DImode, gen_rtx_POST_MODIFY
1535 (Pmode, base, gen_rtx_PLUS
1536 (Pmode, base,
1537 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1541 break;
1543 default:
1544 gcc_unreachable ();
1546 break;
1549 default:
1550 gcc_unreachable ();
1553 return fixup;
1556 /* Split a TImode or TFmode move instruction after reload.
1557 This is used by *movtf_internal and *movti_internal. */
1558 void
1559 ia64_split_tmode_move (rtx operands[])
1561 rtx in[2], out[2], insn;
1562 rtx fixup[2];
1563 bool dead = false;
1564 bool reversed = false;
1566 /* It is possible for reload to decide to overwrite a pointer with
1567 the value it points to. In that case we have to do the loads in
1568 the appropriate order so that the pointer is not destroyed too
1569 early. Also we must not generate a postmodify for that second
1570 load, or rws_access_regno will die. And we must not generate a
1571 postmodify for the second load if the destination register
1572 overlaps with the base register. */
1573 if (GET_CODE (operands[1]) == MEM
1574 && reg_overlap_mentioned_p (operands[0], operands[1]))
1576 rtx base = XEXP (operands[1], 0);
1577 while (GET_CODE (base) != REG)
1578 base = XEXP (base, 0);
1580 if (REGNO (base) == REGNO (operands[0]))
1581 reversed = true;
1583 if (refers_to_regno_p (REGNO (operands[0]),
1584 REGNO (operands[0])+2,
1585 base, 0))
1586 dead = true;
1588 /* Another reason to do the moves in reversed order is if the first
1589 element of the target register pair is also the second element of
1590 the source register pair. */
1591 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1592 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1593 reversed = true;
1595 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1596 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1598 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1599 if (GET_CODE (EXP) == MEM \
1600 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1601 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1602 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1603 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1605 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1606 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1607 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1609 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1610 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1611 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1613 if (fixup[0])
1614 emit_insn (fixup[0]);
1615 if (fixup[1])
1616 emit_insn (fixup[1]);
1618 #undef MAYBE_ADD_REG_INC_NOTE
1621 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1622 through memory plus an extra GR scratch register. Except that you can
1623 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1624 SECONDARY_RELOAD_CLASS, but not both.
1626 We got into problems in the first place by allowing a construct like
1627 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1628 This solution attempts to prevent this situation from occurring. When
1629 we see something like the above, we spill the inner register to memory. */
1631 static rtx
1632 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1634 if (GET_CODE (in) == SUBREG
1635 && GET_MODE (SUBREG_REG (in)) == TImode
1636 && GET_CODE (SUBREG_REG (in)) == REG)
1638 rtx memt = assign_stack_temp (TImode, 16);
1639 emit_move_insn (memt, SUBREG_REG (in));
1640 return adjust_address (memt, mode, 0);
1642 else if (force && GET_CODE (in) == REG)
1644 rtx memx = assign_stack_temp (mode, 16);
1645 emit_move_insn (memx, in);
1646 return memx;
1648 else
1649 return in;
1652 /* Expand the movxf or movrf pattern (MODE says which) with the given
1653 OPERANDS, returning true if the pattern should then invoke
1654 DONE. */
1656 bool
1657 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1659 rtx op0 = operands[0];
1661 if (GET_CODE (op0) == SUBREG)
1662 op0 = SUBREG_REG (op0);
1664 /* We must support XFmode loads into general registers for stdarg/vararg,
1665 unprototyped calls, and a rare case where a long double is passed as
1666 an argument after a float HFA fills the FP registers. We split them into
1667 DImode loads for convenience. We also need to support XFmode stores
1668 for the last case. This case does not happen for stdarg/vararg routines,
1669 because we do a block store to memory of unnamed arguments. */
1671 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1673 rtx out[2];
1675 /* We're hoping to transform everything that deals with XFmode
1676 quantities and GR registers early in the compiler. */
1677 gcc_assert (can_create_pseudo_p ());
1679 /* Struct to register can just use TImode instead. */
1680 if ((GET_CODE (operands[1]) == SUBREG
1681 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1682 || (GET_CODE (operands[1]) == REG
1683 && GR_REGNO_P (REGNO (operands[1]))))
1685 rtx op1 = operands[1];
1687 if (GET_CODE (op1) == SUBREG)
1688 op1 = SUBREG_REG (op1);
1689 else
1690 op1 = gen_rtx_REG (TImode, REGNO (op1));
1692 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1693 return true;
1696 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1698 /* Don't word-swap when reading in the constant. */
1699 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1700 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1701 0, mode));
1702 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1703 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1704 0, mode));
1705 return true;
1708 /* If the quantity is in a register not known to be GR, spill it. */
1709 if (register_operand (operands[1], mode))
1710 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1712 gcc_assert (GET_CODE (operands[1]) == MEM);
1714 /* Don't word-swap when reading in the value. */
1715 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1716 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1718 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1719 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1720 return true;
1723 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1725 /* We're hoping to transform everything that deals with XFmode
1726 quantities and GR registers early in the compiler. */
1727 gcc_assert (can_create_pseudo_p ());
1729 /* Op0 can't be a GR_REG here, as that case is handled above.
1730 If op0 is a register, then we spill op1, so that we now have a
1731 MEM operand. This requires creating an XFmode subreg of a TImode reg
1732 to force the spill. */
1733 if (register_operand (operands[0], mode))
1735 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1736 op1 = gen_rtx_SUBREG (mode, op1, 0);
1737 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1740 else
1742 rtx in[2];
1744 gcc_assert (GET_CODE (operands[0]) == MEM);
1746 /* Don't word-swap when writing out the value. */
1747 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1748 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1750 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1751 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1752 return true;
1756 if (!reload_in_progress && !reload_completed)
1758 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1760 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1762 rtx memt, memx, in = operands[1];
1763 if (CONSTANT_P (in))
1764 in = validize_mem (force_const_mem (mode, in));
1765 if (GET_CODE (in) == MEM)
1766 memt = adjust_address (in, TImode, 0);
1767 else
1769 memt = assign_stack_temp (TImode, 16);
1770 memx = adjust_address (memt, mode, 0);
1771 emit_move_insn (memx, in);
1773 emit_move_insn (op0, memt);
1774 return true;
1777 if (!ia64_move_ok (operands[0], operands[1]))
1778 operands[1] = force_reg (mode, operands[1]);
1781 return false;
1784 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1785 with the expression that holds the compare result (in VOIDmode). */
1787 static GTY(()) rtx cmptf_libfunc;
1789 void
1790 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1792 enum rtx_code code = GET_CODE (*expr);
1793 rtx cmp;
1795 /* If we have a BImode input, then we already have a compare result, and
1796 do not need to emit another comparison. */
1797 if (GET_MODE (*op0) == BImode)
1799 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1800 cmp = *op0;
1802 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1803 magic number as its third argument, that indicates what to do.
1804 The return value is an integer to be compared against zero. */
1805 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1807 enum qfcmp_magic {
1808 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
1809 QCMP_UNORD = 2,
1810 QCMP_EQ = 4,
1811 QCMP_LT = 8,
1812 QCMP_GT = 16
1814 int magic;
1815 enum rtx_code ncode;
1816 rtx ret, insns;
1818 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1819 switch (code)
1821 /* 1 = equal, 0 = not equal. Equality operators do
1822 not raise FP_INVALID when given a NaN operand. */
1823 case EQ: magic = QCMP_EQ; ncode = NE; break;
1824 case NE: magic = QCMP_EQ; ncode = EQ; break;
1825 /* isunordered() from C99. */
1826 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1827 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1828 /* Relational operators raise FP_INVALID when given
1829 a NaN operand. */
1830 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1831 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1832 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1833 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1834 /* Unordered relational operators do not raise FP_INVALID
1835 when given a NaN operand. */
1836 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1837 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1838 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1839 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1840 /* Not supported. */
1841 case UNEQ:
1842 case LTGT:
1843 default: gcc_unreachable ();
1846 start_sequence ();
1848 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1849 *op0, TFmode, *op1, TFmode,
1850 GEN_INT (magic), DImode);
1851 cmp = gen_reg_rtx (BImode);
1852 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1853 ret, const0_rtx)));
1855 insns = get_insns ();
1856 end_sequence ();
1858 emit_libcall_block (insns, cmp, cmp,
1859 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1860 code = NE;
1862 else
1864 cmp = gen_reg_rtx (BImode);
1865 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1866 code = NE;
1869 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1870 *op0 = cmp;
1871 *op1 = const0_rtx;
1874 /* Generate an integral vector comparison. Return true if the condition has
1875 been reversed, and so the sense of the comparison should be inverted. */
1877 static bool
1878 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1879 rtx dest, rtx op0, rtx op1)
1881 bool negate = false;
1882 rtx x;
1884 /* Canonicalize the comparison to EQ, GT, GTU. */
1885 switch (code)
1887 case EQ:
1888 case GT:
1889 case GTU:
1890 break;
1892 case NE:
1893 case LE:
1894 case LEU:
1895 code = reverse_condition (code);
1896 negate = true;
1897 break;
1899 case GE:
1900 case GEU:
1901 code = reverse_condition (code);
1902 negate = true;
1903 /* FALLTHRU */
1905 case LT:
1906 case LTU:
1907 code = swap_condition (code);
1908 x = op0, op0 = op1, op1 = x;
1909 break;
1911 default:
1912 gcc_unreachable ();
1915 /* Unsigned parallel compare is not supported by the hardware. Play some
1916 tricks to turn this into a signed comparison against 0. */
1917 if (code == GTU)
1919 switch (mode)
1921 case V2SImode:
1923 rtx t1, t2, mask;
1925 /* Subtract (-(INT MAX) - 1) from both operands to make
1926 them signed. */
1927 mask = GEN_INT (0x80000000);
1928 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1929 mask = force_reg (mode, mask);
1930 t1 = gen_reg_rtx (mode);
1931 emit_insn (gen_subv2si3 (t1, op0, mask));
1932 t2 = gen_reg_rtx (mode);
1933 emit_insn (gen_subv2si3 (t2, op1, mask));
1934 op0 = t1;
1935 op1 = t2;
1936 code = GT;
1938 break;
1940 case V8QImode:
1941 case V4HImode:
1942 /* Perform a parallel unsigned saturating subtraction. */
1943 x = gen_reg_rtx (mode);
1944 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1946 code = EQ;
1947 op0 = x;
1948 op1 = CONST0_RTX (mode);
1949 negate = !negate;
1950 break;
1952 default:
1953 gcc_unreachable ();
1957 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1958 emit_insn (gen_rtx_SET (dest, x));
1960 return negate;
1963 /* Emit an integral vector conditional move. */
1965 void
1966 ia64_expand_vecint_cmov (rtx operands[])
1968 machine_mode mode = GET_MODE (operands[0]);
1969 enum rtx_code code = GET_CODE (operands[3]);
1970 bool negate;
1971 rtx cmp, x, ot, of;
1973 cmp = gen_reg_rtx (mode);
1974 negate = ia64_expand_vecint_compare (code, mode, cmp,
1975 operands[4], operands[5]);
1977 ot = operands[1+negate];
1978 of = operands[2-negate];
1980 if (ot == CONST0_RTX (mode))
1982 if (of == CONST0_RTX (mode))
1984 emit_move_insn (operands[0], ot);
1985 return;
1988 x = gen_rtx_NOT (mode, cmp);
1989 x = gen_rtx_AND (mode, x, of);
1990 emit_insn (gen_rtx_SET (operands[0], x));
1992 else if (of == CONST0_RTX (mode))
1994 x = gen_rtx_AND (mode, cmp, ot);
1995 emit_insn (gen_rtx_SET (operands[0], x));
1997 else
1999 rtx t, f;
2001 t = gen_reg_rtx (mode);
2002 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2003 emit_insn (gen_rtx_SET (t, x));
2005 f = gen_reg_rtx (mode);
2006 x = gen_rtx_NOT (mode, cmp);
2007 x = gen_rtx_AND (mode, x, operands[2-negate]);
2008 emit_insn (gen_rtx_SET (f, x));
2010 x = gen_rtx_IOR (mode, t, f);
2011 emit_insn (gen_rtx_SET (operands[0], x));
2015 /* Emit an integral vector min or max operation. Return true if all done. */
2017 bool
2018 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2019 rtx operands[])
2021 rtx xops[6];
2023 /* These four combinations are supported directly. */
2024 if (mode == V8QImode && (code == UMIN || code == UMAX))
2025 return false;
2026 if (mode == V4HImode && (code == SMIN || code == SMAX))
2027 return false;
2029 /* This combination can be implemented with only saturating subtraction. */
2030 if (mode == V4HImode && code == UMAX)
2032 rtx x, tmp = gen_reg_rtx (mode);
2034 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2035 emit_insn (gen_rtx_SET (tmp, x));
2037 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2038 return true;
2041 /* Everything else implemented via vector comparisons. */
2042 xops[0] = operands[0];
2043 xops[4] = xops[1] = operands[1];
2044 xops[5] = xops[2] = operands[2];
2046 switch (code)
2048 case UMIN:
2049 code = LTU;
2050 break;
2051 case UMAX:
2052 code = GTU;
2053 break;
2054 case SMIN:
2055 code = LT;
2056 break;
2057 case SMAX:
2058 code = GT;
2059 break;
2060 default:
2061 gcc_unreachable ();
2063 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2065 ia64_expand_vecint_cmov (xops);
2066 return true;
2069 /* The vectors LO and HI each contain N halves of a double-wide vector.
2070 Reassemble either the first N/2 or the second N/2 elements. */
2072 void
2073 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2075 machine_mode vmode = GET_MODE (lo);
2076 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2077 struct expand_vec_perm_d d;
2078 bool ok;
2080 d.target = gen_lowpart (vmode, out);
2081 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2082 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2083 d.vmode = vmode;
2084 d.nelt = nelt;
2085 d.one_operand_p = false;
2086 d.testing_p = false;
2088 high = (highp ? nelt / 2 : 0);
2089 for (i = 0; i < nelt / 2; ++i)
2091 d.perm[i * 2] = i + high;
2092 d.perm[i * 2 + 1] = i + high + nelt;
2095 ok = ia64_expand_vec_perm_const_1 (&d);
2096 gcc_assert (ok);
2099 /* Return a vector of the sign-extension of VEC. */
2101 static rtx
2102 ia64_unpack_sign (rtx vec, bool unsignedp)
2104 machine_mode mode = GET_MODE (vec);
2105 rtx zero = CONST0_RTX (mode);
2107 if (unsignedp)
2108 return zero;
2109 else
2111 rtx sign = gen_reg_rtx (mode);
2112 bool neg;
2114 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2115 gcc_assert (!neg);
2117 return sign;
2121 /* Emit an integral vector unpack operation. */
2123 void
2124 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2126 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2127 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2130 /* Emit an integral vector widening sum operations. */
2132 void
2133 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2135 machine_mode wmode;
2136 rtx l, h, t, sign;
2138 sign = ia64_unpack_sign (operands[1], unsignedp);
2140 wmode = GET_MODE (operands[0]);
2141 l = gen_reg_rtx (wmode);
2142 h = gen_reg_rtx (wmode);
2144 ia64_unpack_assemble (l, operands[1], sign, false);
2145 ia64_unpack_assemble (h, operands[1], sign, true);
2147 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2148 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2149 if (t != operands[0])
2150 emit_move_insn (operands[0], t);
2153 /* Emit the appropriate sequence for a call. */
2155 void
2156 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2157 int sibcall_p)
2159 rtx insn, b0;
2161 addr = XEXP (addr, 0);
2162 addr = convert_memory_address (DImode, addr);
2163 b0 = gen_rtx_REG (DImode, R_BR (0));
2165 /* ??? Should do this for functions known to bind local too. */
2166 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2168 if (sibcall_p)
2169 insn = gen_sibcall_nogp (addr);
2170 else if (! retval)
2171 insn = gen_call_nogp (addr, b0);
2172 else
2173 insn = gen_call_value_nogp (retval, addr, b0);
2174 insn = emit_call_insn (insn);
2176 else
2178 if (sibcall_p)
2179 insn = gen_sibcall_gp (addr);
2180 else if (! retval)
2181 insn = gen_call_gp (addr, b0);
2182 else
2183 insn = gen_call_value_gp (retval, addr, b0);
2184 insn = emit_call_insn (insn);
2186 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2189 if (sibcall_p)
2190 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2192 if (TARGET_ABI_OPEN_VMS)
2193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2194 gen_rtx_REG (DImode, GR_REG (25)));
2197 static void
2198 reg_emitted (enum ia64_frame_regs r)
2200 if (emitted_frame_related_regs[r] == 0)
2201 emitted_frame_related_regs[r] = current_frame_info.r[r];
2202 else
2203 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2206 static int
2207 get_reg (enum ia64_frame_regs r)
2209 reg_emitted (r);
2210 return current_frame_info.r[r];
2213 static bool
2214 is_emitted (int regno)
2216 unsigned int r;
2218 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2219 if (emitted_frame_related_regs[r] == regno)
2220 return true;
2221 return false;
2224 void
2225 ia64_reload_gp (void)
2227 rtx tmp;
2229 if (current_frame_info.r[reg_save_gp])
2231 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2233 else
2235 HOST_WIDE_INT offset;
2236 rtx offset_r;
2238 offset = (current_frame_info.spill_cfa_off
2239 + current_frame_info.spill_size);
2240 if (frame_pointer_needed)
2242 tmp = hard_frame_pointer_rtx;
2243 offset = -offset;
2245 else
2247 tmp = stack_pointer_rtx;
2248 offset = current_frame_info.total_size - offset;
2251 offset_r = GEN_INT (offset);
2252 if (satisfies_constraint_I (offset_r))
2253 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2254 else
2256 emit_move_insn (pic_offset_table_rtx, offset_r);
2257 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2258 pic_offset_table_rtx, tmp));
2261 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2264 emit_move_insn (pic_offset_table_rtx, tmp);
2267 void
2268 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2269 rtx scratch_b, int noreturn_p, int sibcall_p)
2271 rtx insn;
2272 bool is_desc = false;
2274 /* If we find we're calling through a register, then we're actually
2275 calling through a descriptor, so load up the values. */
2276 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2278 rtx tmp;
2279 bool addr_dead_p;
2281 /* ??? We are currently constrained to *not* use peep2, because
2282 we can legitimately change the global lifetime of the GP
2283 (in the form of killing where previously live). This is
2284 because a call through a descriptor doesn't use the previous
2285 value of the GP, while a direct call does, and we do not
2286 commit to either form until the split here.
2288 That said, this means that we lack precise life info for
2289 whether ADDR is dead after this call. This is not terribly
2290 important, since we can fix things up essentially for free
2291 with the POST_DEC below, but it's nice to not use it when we
2292 can immediately tell it's not necessary. */
2293 addr_dead_p = ((noreturn_p || sibcall_p
2294 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2295 REGNO (addr)))
2296 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2298 /* Load the code address into scratch_b. */
2299 tmp = gen_rtx_POST_INC (Pmode, addr);
2300 tmp = gen_rtx_MEM (Pmode, tmp);
2301 emit_move_insn (scratch_r, tmp);
2302 emit_move_insn (scratch_b, scratch_r);
2304 /* Load the GP address. If ADDR is not dead here, then we must
2305 revert the change made above via the POST_INCREMENT. */
2306 if (!addr_dead_p)
2307 tmp = gen_rtx_POST_DEC (Pmode, addr);
2308 else
2309 tmp = addr;
2310 tmp = gen_rtx_MEM (Pmode, tmp);
2311 emit_move_insn (pic_offset_table_rtx, tmp);
2313 is_desc = true;
2314 addr = scratch_b;
2317 if (sibcall_p)
2318 insn = gen_sibcall_nogp (addr);
2319 else if (retval)
2320 insn = gen_call_value_nogp (retval, addr, retaddr);
2321 else
2322 insn = gen_call_nogp (addr, retaddr);
2323 emit_call_insn (insn);
2325 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2326 ia64_reload_gp ();
2329 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2331 This differs from the generic code in that we know about the zero-extending
2332 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2333 also know that ld.acq+cmpxchg.rel equals a full barrier.
2335 The loop we want to generate looks like
2337 cmp_reg = mem;
2338 label:
2339 old_reg = cmp_reg;
2340 new_reg = cmp_reg op val;
2341 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2342 if (cmp_reg != old_reg)
2343 goto label;
2345 Note that we only do the plain load from memory once. Subsequent
2346 iterations use the value loaded by the compare-and-swap pattern. */
2348 void
2349 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2350 rtx old_dst, rtx new_dst, enum memmodel model)
2352 machine_mode mode = GET_MODE (mem);
2353 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2354 enum insn_code icode;
2356 /* Special case for using fetchadd. */
2357 if ((mode == SImode || mode == DImode)
2358 && (code == PLUS || code == MINUS)
2359 && fetchadd_operand (val, mode))
2361 if (code == MINUS)
2362 val = GEN_INT (-INTVAL (val));
2364 if (!old_dst)
2365 old_dst = gen_reg_rtx (mode);
2367 switch (model)
2369 case MEMMODEL_ACQ_REL:
2370 case MEMMODEL_SEQ_CST:
2371 case MEMMODEL_SYNC_SEQ_CST:
2372 emit_insn (gen_memory_barrier ());
2373 /* FALLTHRU */
2374 case MEMMODEL_RELAXED:
2375 case MEMMODEL_ACQUIRE:
2376 case MEMMODEL_SYNC_ACQUIRE:
2377 case MEMMODEL_CONSUME:
2378 if (mode == SImode)
2379 icode = CODE_FOR_fetchadd_acq_si;
2380 else
2381 icode = CODE_FOR_fetchadd_acq_di;
2382 break;
2383 case MEMMODEL_RELEASE:
2384 case MEMMODEL_SYNC_RELEASE:
2385 if (mode == SImode)
2386 icode = CODE_FOR_fetchadd_rel_si;
2387 else
2388 icode = CODE_FOR_fetchadd_rel_di;
2389 break;
2391 default:
2392 gcc_unreachable ();
2395 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2397 if (new_dst)
2399 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2400 true, OPTAB_WIDEN);
2401 if (new_reg != new_dst)
2402 emit_move_insn (new_dst, new_reg);
2404 return;
2407 /* Because of the volatile mem read, we get an ld.acq, which is the
2408 front half of the full barrier. The end half is the cmpxchg.rel.
2409 For relaxed and release memory models, we don't need this. But we
2410 also don't bother trying to prevent it either. */
2411 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2412 || MEM_VOLATILE_P (mem));
2414 old_reg = gen_reg_rtx (DImode);
2415 cmp_reg = gen_reg_rtx (DImode);
2416 label = gen_label_rtx ();
2418 if (mode != DImode)
2420 val = simplify_gen_subreg (DImode, val, mode, 0);
2421 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2423 else
2424 emit_move_insn (cmp_reg, mem);
2426 emit_label (label);
2428 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2429 emit_move_insn (old_reg, cmp_reg);
2430 emit_move_insn (ar_ccv, cmp_reg);
2432 if (old_dst)
2433 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2435 new_reg = cmp_reg;
2436 if (code == NOT)
2438 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2439 true, OPTAB_DIRECT);
2440 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2442 else
2443 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2444 true, OPTAB_DIRECT);
2446 if (mode != DImode)
2447 new_reg = gen_lowpart (mode, new_reg);
2448 if (new_dst)
2449 emit_move_insn (new_dst, new_reg);
2451 switch (model)
2453 case MEMMODEL_RELAXED:
2454 case MEMMODEL_ACQUIRE:
2455 case MEMMODEL_SYNC_ACQUIRE:
2456 case MEMMODEL_CONSUME:
2457 switch (mode)
2459 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2460 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2461 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2462 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2463 default:
2464 gcc_unreachable ();
2466 break;
2468 case MEMMODEL_RELEASE:
2469 case MEMMODEL_SYNC_RELEASE:
2470 case MEMMODEL_ACQ_REL:
2471 case MEMMODEL_SEQ_CST:
2472 case MEMMODEL_SYNC_SEQ_CST:
2473 switch (mode)
2475 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2476 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2477 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2478 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2479 default:
2480 gcc_unreachable ();
2482 break;
2484 default:
2485 gcc_unreachable ();
2488 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2490 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2493 /* Begin the assembly file. */
2495 static void
2496 ia64_file_start (void)
2498 default_file_start ();
2499 emit_safe_across_calls ();
2502 void
2503 emit_safe_across_calls (void)
2505 unsigned int rs, re;
2506 int out_state;
2508 rs = 1;
2509 out_state = 0;
2510 while (1)
2512 while (rs < 64 && call_used_regs[PR_REG (rs)])
2513 rs++;
2514 if (rs >= 64)
2515 break;
2516 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2517 continue;
2518 if (out_state == 0)
2520 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2521 out_state = 1;
2523 else
2524 fputc (',', asm_out_file);
2525 if (re == rs + 1)
2526 fprintf (asm_out_file, "p%u", rs);
2527 else
2528 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2529 rs = re + 1;
2531 if (out_state)
2532 fputc ('\n', asm_out_file);
2535 /* Globalize a declaration. */
2537 static void
2538 ia64_globalize_decl_name (FILE * stream, tree decl)
2540 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2541 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2542 if (version_attr)
2544 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2545 const char *p = TREE_STRING_POINTER (v);
2546 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2548 targetm.asm_out.globalize_label (stream, name);
2549 if (TREE_CODE (decl) == FUNCTION_DECL)
2550 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2553 /* Helper function for ia64_compute_frame_size: find an appropriate general
2554 register to spill some special register to. SPECIAL_SPILL_MASK contains
2555 bits in GR0 to GR31 that have already been allocated by this routine.
2556 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2558 static int
2559 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2561 int regno;
2563 if (emitted_frame_related_regs[r] != 0)
2565 regno = emitted_frame_related_regs[r];
2566 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2567 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2568 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2569 else if (crtl->is_leaf
2570 && regno >= GR_REG (1) && regno <= GR_REG (31))
2571 current_frame_info.gr_used_mask |= 1 << regno;
2573 return regno;
2576 /* If this is a leaf function, first try an otherwise unused
2577 call-clobbered register. */
2578 if (crtl->is_leaf)
2580 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2581 if (! df_regs_ever_live_p (regno)
2582 && call_used_regs[regno]
2583 && ! fixed_regs[regno]
2584 && ! global_regs[regno]
2585 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2586 && ! is_emitted (regno))
2588 current_frame_info.gr_used_mask |= 1 << regno;
2589 return regno;
2593 if (try_locals)
2595 regno = current_frame_info.n_local_regs;
2596 /* If there is a frame pointer, then we can't use loc79, because
2597 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2598 reg_name switching code in ia64_expand_prologue. */
2599 while (regno < (80 - frame_pointer_needed))
2600 if (! is_emitted (LOC_REG (regno++)))
2602 current_frame_info.n_local_regs = regno;
2603 return LOC_REG (regno - 1);
2607 /* Failed to find a general register to spill to. Must use stack. */
2608 return 0;
2611 /* In order to make for nice schedules, we try to allocate every temporary
2612 to a different register. We must of course stay away from call-saved,
2613 fixed, and global registers. We must also stay away from registers
2614 allocated in current_frame_info.gr_used_mask, since those include regs
2615 used all through the prologue.
2617 Any register allocated here must be used immediately. The idea is to
2618 aid scheduling, not to solve data flow problems. */
2620 static int last_scratch_gr_reg;
2622 static int
2623 next_scratch_gr_reg (void)
2625 int i, regno;
2627 for (i = 0; i < 32; ++i)
2629 regno = (last_scratch_gr_reg + i + 1) & 31;
2630 if (call_used_regs[regno]
2631 && ! fixed_regs[regno]
2632 && ! global_regs[regno]
2633 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2635 last_scratch_gr_reg = regno;
2636 return regno;
2640 /* There must be _something_ available. */
2641 gcc_unreachable ();
2644 /* Helper function for ia64_compute_frame_size, called through
2645 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2647 static void
2648 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2650 unsigned int regno = REGNO (reg);
2651 if (regno < 32)
2653 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2654 for (i = 0; i < n; ++i)
2655 current_frame_info.gr_used_mask |= 1 << (regno + i);
2660 /* Returns the number of bytes offset between the frame pointer and the stack
2661 pointer for the current function. SIZE is the number of bytes of space
2662 needed for local variables. */
2664 static void
2665 ia64_compute_frame_size (HOST_WIDE_INT size)
2667 HOST_WIDE_INT total_size;
2668 HOST_WIDE_INT spill_size = 0;
2669 HOST_WIDE_INT extra_spill_size = 0;
2670 HOST_WIDE_INT pretend_args_size;
2671 HARD_REG_SET mask;
2672 int n_spilled = 0;
2673 int spilled_gr_p = 0;
2674 int spilled_fr_p = 0;
2675 unsigned int regno;
2676 int min_regno;
2677 int max_regno;
2678 int i;
2680 if (current_frame_info.initialized)
2681 return;
2683 memset (&current_frame_info, 0, sizeof current_frame_info);
2684 CLEAR_HARD_REG_SET (mask);
2686 /* Don't allocate scratches to the return register. */
2687 diddle_return_value (mark_reg_gr_used_mask, NULL);
2689 /* Don't allocate scratches to the EH scratch registers. */
2690 if (cfun->machine->ia64_eh_epilogue_sp)
2691 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2692 if (cfun->machine->ia64_eh_epilogue_bsp)
2693 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2695 /* Static stack checking uses r2 and r3. */
2696 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2697 current_frame_info.gr_used_mask |= 0xc;
2699 /* Find the size of the register stack frame. We have only 80 local
2700 registers, because we reserve 8 for the inputs and 8 for the
2701 outputs. */
2703 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2704 since we'll be adjusting that down later. */
2705 regno = LOC_REG (78) + ! frame_pointer_needed;
2706 for (; regno >= LOC_REG (0); regno--)
2707 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2708 break;
2709 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2711 /* For functions marked with the syscall_linkage attribute, we must mark
2712 all eight input registers as in use, so that locals aren't visible to
2713 the caller. */
2715 if (cfun->machine->n_varargs > 0
2716 || lookup_attribute ("syscall_linkage",
2717 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2718 current_frame_info.n_input_regs = 8;
2719 else
2721 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2722 if (df_regs_ever_live_p (regno))
2723 break;
2724 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2727 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2728 if (df_regs_ever_live_p (regno))
2729 break;
2730 i = regno - OUT_REG (0) + 1;
2732 #ifndef PROFILE_HOOK
2733 /* When -p profiling, we need one output register for the mcount argument.
2734 Likewise for -a profiling for the bb_init_func argument. For -ax
2735 profiling, we need two output registers for the two bb_init_trace_func
2736 arguments. */
2737 if (crtl->profile)
2738 i = MAX (i, 1);
2739 #endif
2740 current_frame_info.n_output_regs = i;
2742 /* ??? No rotating register support yet. */
2743 current_frame_info.n_rotate_regs = 0;
2745 /* Discover which registers need spilling, and how much room that
2746 will take. Begin with floating point and general registers,
2747 which will always wind up on the stack. */
2749 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2750 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2752 SET_HARD_REG_BIT (mask, regno);
2753 spill_size += 16;
2754 n_spilled += 1;
2755 spilled_fr_p = 1;
2758 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2759 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2761 SET_HARD_REG_BIT (mask, regno);
2762 spill_size += 8;
2763 n_spilled += 1;
2764 spilled_gr_p = 1;
2767 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2768 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2770 SET_HARD_REG_BIT (mask, regno);
2771 spill_size += 8;
2772 n_spilled += 1;
2775 /* Now come all special registers that might get saved in other
2776 general registers. */
2778 if (frame_pointer_needed)
2780 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2781 /* If we did not get a register, then we take LOC79. This is guaranteed
2782 to be free, even if regs_ever_live is already set, because this is
2783 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2784 as we don't count loc79 above. */
2785 if (current_frame_info.r[reg_fp] == 0)
2787 current_frame_info.r[reg_fp] = LOC_REG (79);
2788 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2792 if (! crtl->is_leaf)
2794 /* Emit a save of BR0 if we call other functions. Do this even
2795 if this function doesn't return, as EH depends on this to be
2796 able to unwind the stack. */
2797 SET_HARD_REG_BIT (mask, BR_REG (0));
2799 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2800 if (current_frame_info.r[reg_save_b0] == 0)
2802 extra_spill_size += 8;
2803 n_spilled += 1;
2806 /* Similarly for ar.pfs. */
2807 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2808 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2809 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2811 extra_spill_size += 8;
2812 n_spilled += 1;
2815 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2816 registers are clobbered, so we fall back to the stack. */
2817 current_frame_info.r[reg_save_gp]
2818 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2819 if (current_frame_info.r[reg_save_gp] == 0)
2821 SET_HARD_REG_BIT (mask, GR_REG (1));
2822 spill_size += 8;
2823 n_spilled += 1;
2826 else
2828 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2830 SET_HARD_REG_BIT (mask, BR_REG (0));
2831 extra_spill_size += 8;
2832 n_spilled += 1;
2835 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2837 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2838 current_frame_info.r[reg_save_ar_pfs]
2839 = find_gr_spill (reg_save_ar_pfs, 1);
2840 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2842 extra_spill_size += 8;
2843 n_spilled += 1;
2848 /* Unwind descriptor hackery: things are most efficient if we allocate
2849 consecutive GR save registers for RP, PFS, FP in that order. However,
2850 it is absolutely critical that FP get the only hard register that's
2851 guaranteed to be free, so we allocated it first. If all three did
2852 happen to be allocated hard regs, and are consecutive, rearrange them
2853 into the preferred order now.
2855 If we have already emitted code for any of those registers,
2856 then it's already too late to change. */
2857 min_regno = MIN (current_frame_info.r[reg_fp],
2858 MIN (current_frame_info.r[reg_save_b0],
2859 current_frame_info.r[reg_save_ar_pfs]));
2860 max_regno = MAX (current_frame_info.r[reg_fp],
2861 MAX (current_frame_info.r[reg_save_b0],
2862 current_frame_info.r[reg_save_ar_pfs]));
2863 if (min_regno > 0
2864 && min_regno + 2 == max_regno
2865 && (current_frame_info.r[reg_fp] == min_regno + 1
2866 || current_frame_info.r[reg_save_b0] == min_regno + 1
2867 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2868 && (emitted_frame_related_regs[reg_save_b0] == 0
2869 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2870 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2871 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2872 && (emitted_frame_related_regs[reg_fp] == 0
2873 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2875 current_frame_info.r[reg_save_b0] = min_regno;
2876 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2877 current_frame_info.r[reg_fp] = min_regno + 2;
2880 /* See if we need to store the predicate register block. */
2881 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2882 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2883 break;
2884 if (regno <= PR_REG (63))
2886 SET_HARD_REG_BIT (mask, PR_REG (0));
2887 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2888 if (current_frame_info.r[reg_save_pr] == 0)
2890 extra_spill_size += 8;
2891 n_spilled += 1;
2894 /* ??? Mark them all as used so that register renaming and such
2895 are free to use them. */
2896 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2897 df_set_regs_ever_live (regno, true);
2900 /* If we're forced to use st8.spill, we're forced to save and restore
2901 ar.unat as well. The check for existing liveness allows inline asm
2902 to touch ar.unat. */
2903 if (spilled_gr_p || cfun->machine->n_varargs
2904 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2906 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2907 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2908 current_frame_info.r[reg_save_ar_unat]
2909 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2910 if (current_frame_info.r[reg_save_ar_unat] == 0)
2912 extra_spill_size += 8;
2913 n_spilled += 1;
2917 if (df_regs_ever_live_p (AR_LC_REGNUM))
2919 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2920 current_frame_info.r[reg_save_ar_lc]
2921 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2922 if (current_frame_info.r[reg_save_ar_lc] == 0)
2924 extra_spill_size += 8;
2925 n_spilled += 1;
2929 /* If we have an odd number of words of pretend arguments written to
2930 the stack, then the FR save area will be unaligned. We round the
2931 size of this area up to keep things 16 byte aligned. */
2932 if (spilled_fr_p)
2933 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2934 else
2935 pretend_args_size = crtl->args.pretend_args_size;
2937 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2938 + crtl->outgoing_args_size);
2939 total_size = IA64_STACK_ALIGN (total_size);
2941 /* We always use the 16-byte scratch area provided by the caller, but
2942 if we are a leaf function, there's no one to which we need to provide
2943 a scratch area. However, if the function allocates dynamic stack space,
2944 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2945 so we need to cope. */
2946 if (crtl->is_leaf && !cfun->calls_alloca)
2947 total_size = MAX (0, total_size - 16);
2949 current_frame_info.total_size = total_size;
2950 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2951 current_frame_info.spill_size = spill_size;
2952 current_frame_info.extra_spill_size = extra_spill_size;
2953 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2954 current_frame_info.n_spilled = n_spilled;
2955 current_frame_info.initialized = reload_completed;
2958 /* Worker function for TARGET_CAN_ELIMINATE. */
2960 bool
2961 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2963 return (to == BR_REG (0) ? crtl->is_leaf : true);
2966 /* Compute the initial difference between the specified pair of registers. */
2968 HOST_WIDE_INT
2969 ia64_initial_elimination_offset (int from, int to)
2971 HOST_WIDE_INT offset;
2973 ia64_compute_frame_size (get_frame_size ());
2974 switch (from)
2976 case FRAME_POINTER_REGNUM:
2977 switch (to)
2979 case HARD_FRAME_POINTER_REGNUM:
2980 offset = -current_frame_info.total_size;
2981 if (!crtl->is_leaf || cfun->calls_alloca)
2982 offset += 16 + crtl->outgoing_args_size;
2983 break;
2985 case STACK_POINTER_REGNUM:
2986 offset = 0;
2987 if (!crtl->is_leaf || cfun->calls_alloca)
2988 offset += 16 + crtl->outgoing_args_size;
2989 break;
2991 default:
2992 gcc_unreachable ();
2994 break;
2996 case ARG_POINTER_REGNUM:
2997 /* Arguments start above the 16 byte save area, unless stdarg
2998 in which case we store through the 16 byte save area. */
2999 switch (to)
3001 case HARD_FRAME_POINTER_REGNUM:
3002 offset = 16 - crtl->args.pretend_args_size;
3003 break;
3005 case STACK_POINTER_REGNUM:
3006 offset = (current_frame_info.total_size
3007 + 16 - crtl->args.pretend_args_size);
3008 break;
3010 default:
3011 gcc_unreachable ();
3013 break;
3015 default:
3016 gcc_unreachable ();
3019 return offset;
3022 /* If there are more than a trivial number of register spills, we use
3023 two interleaved iterators so that we can get two memory references
3024 per insn group.
3026 In order to simplify things in the prologue and epilogue expanders,
3027 we use helper functions to fix up the memory references after the
3028 fact with the appropriate offsets to a POST_MODIFY memory mode.
3029 The following data structure tracks the state of the two iterators
3030 while insns are being emitted. */
3032 struct spill_fill_data
3034 rtx_insn *init_after; /* point at which to emit initializations */
3035 rtx init_reg[2]; /* initial base register */
3036 rtx iter_reg[2]; /* the iterator registers */
3037 rtx *prev_addr[2]; /* address of last memory use */
3038 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
3039 HOST_WIDE_INT prev_off[2]; /* last offset */
3040 int n_iter; /* number of iterators in use */
3041 int next_iter; /* next iterator to use */
3042 unsigned int save_gr_used_mask;
3045 static struct spill_fill_data spill_fill_data;
3047 static void
3048 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3050 int i;
3052 spill_fill_data.init_after = get_last_insn ();
3053 spill_fill_data.init_reg[0] = init_reg;
3054 spill_fill_data.init_reg[1] = init_reg;
3055 spill_fill_data.prev_addr[0] = NULL;
3056 spill_fill_data.prev_addr[1] = NULL;
3057 spill_fill_data.prev_insn[0] = NULL;
3058 spill_fill_data.prev_insn[1] = NULL;
3059 spill_fill_data.prev_off[0] = cfa_off;
3060 spill_fill_data.prev_off[1] = cfa_off;
3061 spill_fill_data.next_iter = 0;
3062 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3064 spill_fill_data.n_iter = 1 + (n_spills > 2);
3065 for (i = 0; i < spill_fill_data.n_iter; ++i)
3067 int regno = next_scratch_gr_reg ();
3068 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3069 current_frame_info.gr_used_mask |= 1 << regno;
3073 static void
3074 finish_spill_pointers (void)
3076 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3079 static rtx
3080 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3082 int iter = spill_fill_data.next_iter;
3083 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3084 rtx disp_rtx = GEN_INT (disp);
3085 rtx mem;
3087 if (spill_fill_data.prev_addr[iter])
3089 if (satisfies_constraint_N (disp_rtx))
3091 *spill_fill_data.prev_addr[iter]
3092 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3093 gen_rtx_PLUS (DImode,
3094 spill_fill_data.iter_reg[iter],
3095 disp_rtx));
3096 add_reg_note (spill_fill_data.prev_insn[iter],
3097 REG_INC, spill_fill_data.iter_reg[iter]);
3099 else
3101 /* ??? Could use register post_modify for loads. */
3102 if (!satisfies_constraint_I (disp_rtx))
3104 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3105 emit_move_insn (tmp, disp_rtx);
3106 disp_rtx = tmp;
3108 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3109 spill_fill_data.iter_reg[iter], disp_rtx));
3112 /* Micro-optimization: if we've created a frame pointer, it's at
3113 CFA 0, which may allow the real iterator to be initialized lower,
3114 slightly increasing parallelism. Also, if there are few saves
3115 it may eliminate the iterator entirely. */
3116 else if (disp == 0
3117 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3118 && frame_pointer_needed)
3120 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3121 set_mem_alias_set (mem, get_varargs_alias_set ());
3122 return mem;
3124 else
3126 rtx seq;
3127 rtx_insn *insn;
3129 if (disp == 0)
3130 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3131 spill_fill_data.init_reg[iter]);
3132 else
3134 start_sequence ();
3136 if (!satisfies_constraint_I (disp_rtx))
3138 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3139 emit_move_insn (tmp, disp_rtx);
3140 disp_rtx = tmp;
3143 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3144 spill_fill_data.init_reg[iter],
3145 disp_rtx));
3147 seq = get_insns ();
3148 end_sequence ();
3151 /* Careful for being the first insn in a sequence. */
3152 if (spill_fill_data.init_after)
3153 insn = emit_insn_after (seq, spill_fill_data.init_after);
3154 else
3156 rtx_insn *first = get_insns ();
3157 if (first)
3158 insn = emit_insn_before (seq, first);
3159 else
3160 insn = emit_insn (seq);
3162 spill_fill_data.init_after = insn;
3165 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3167 /* ??? Not all of the spills are for varargs, but some of them are.
3168 The rest of the spills belong in an alias set of their own. But
3169 it doesn't actually hurt to include them here. */
3170 set_mem_alias_set (mem, get_varargs_alias_set ());
3172 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3173 spill_fill_data.prev_off[iter] = cfa_off;
3175 if (++iter >= spill_fill_data.n_iter)
3176 iter = 0;
3177 spill_fill_data.next_iter = iter;
3179 return mem;
3182 static void
3183 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3184 rtx frame_reg)
3186 int iter = spill_fill_data.next_iter;
3187 rtx mem;
3188 rtx_insn *insn;
3190 mem = spill_restore_mem (reg, cfa_off);
3191 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3192 spill_fill_data.prev_insn[iter] = insn;
3194 if (frame_reg)
3196 rtx base;
3197 HOST_WIDE_INT off;
3199 RTX_FRAME_RELATED_P (insn) = 1;
3201 /* Don't even pretend that the unwind code can intuit its way
3202 through a pair of interleaved post_modify iterators. Just
3203 provide the correct answer. */
3205 if (frame_pointer_needed)
3207 base = hard_frame_pointer_rtx;
3208 off = - cfa_off;
3210 else
3212 base = stack_pointer_rtx;
3213 off = current_frame_info.total_size - cfa_off;
3216 add_reg_note (insn, REG_CFA_OFFSET,
3217 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3218 plus_constant (Pmode,
3219 base, off)),
3220 frame_reg));
3224 static void
3225 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3227 int iter = spill_fill_data.next_iter;
3228 rtx_insn *insn;
3230 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3231 GEN_INT (cfa_off)));
3232 spill_fill_data.prev_insn[iter] = insn;
3235 /* Wrapper functions that discards the CONST_INT spill offset. These
3236 exist so that we can give gr_spill/gr_fill the offset they need and
3237 use a consistent function interface. */
3239 static rtx
3240 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3242 return gen_movdi (dest, src);
3245 static rtx
3246 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3248 return gen_fr_spill (dest, src);
3251 static rtx
3252 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3254 return gen_fr_restore (dest, src);
3257 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3259 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3260 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3262 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3263 inclusive. These are offsets from the current stack pointer. BS_SIZE
3264 is the size of the backing store. ??? This clobbers r2 and r3. */
3266 static void
3267 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3268 int bs_size)
3270 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3271 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3272 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3274 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3275 of the Register Stack Engine. We also need to probe it after checking
3276 that the 2 stacks don't overlap. */
3277 emit_insn (gen_bsp_value (r3));
3278 emit_move_insn (r2, GEN_INT (-(first + size)));
3280 /* Compare current value of BSP and SP registers. */
3281 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3282 r3, stack_pointer_rtx)));
3284 /* Compute the address of the probe for the Backing Store (which grows
3285 towards higher addresses). We probe only at the first offset of
3286 the next page because some OS (eg Linux/ia64) only extend the
3287 backing store when this specific address is hit (but generate a SEGV
3288 on other address). Page size is the worst case (4KB). The reserve
3289 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3290 Also compute the address of the last probe for the memory stack
3291 (which grows towards lower addresses). */
3292 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3293 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3295 /* Compare them and raise SEGV if the former has topped the latter. */
3296 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3297 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3298 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3299 r3, r2))));
3300 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3301 const0_rtx),
3302 const0_rtx));
3303 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3304 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3305 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3306 GEN_INT (11))));
3308 /* Probe the Backing Store if necessary. */
3309 if (bs_size > 0)
3310 emit_stack_probe (r3);
3312 /* Probe the memory stack if necessary. */
3313 if (size == 0)
3316 /* See if we have a constant small number of probes to generate. If so,
3317 that's the easy case. */
3318 else if (size <= PROBE_INTERVAL)
3319 emit_stack_probe (r2);
3321 /* The run-time loop is made up of 8 insns in the generic case while this
3322 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3323 else if (size <= 4 * PROBE_INTERVAL)
3325 HOST_WIDE_INT i;
3327 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3328 emit_insn (gen_rtx_SET (r2,
3329 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3330 emit_stack_probe (r2);
3332 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3333 it exceeds SIZE. If only two probes are needed, this will not
3334 generate any code. Then probe at FIRST + SIZE. */
3335 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3337 emit_insn (gen_rtx_SET (r2,
3338 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3339 emit_stack_probe (r2);
3342 emit_insn (gen_rtx_SET (r2,
3343 plus_constant (Pmode, r2,
3344 (i - PROBE_INTERVAL) - size)));
3345 emit_stack_probe (r2);
3348 /* Otherwise, do the same as above, but in a loop. Note that we must be
3349 extra careful with variables wrapping around because we might be at
3350 the very top (or the very bottom) of the address space and we have
3351 to be able to handle this case properly; in particular, we use an
3352 equality test for the loop condition. */
3353 else
3355 HOST_WIDE_INT rounded_size;
3357 emit_move_insn (r2, GEN_INT (-first));
3360 /* Step 1: round SIZE to the previous multiple of the interval. */
3362 rounded_size = size & -PROBE_INTERVAL;
3365 /* Step 2: compute initial and final value of the loop counter. */
3367 /* TEST_ADDR = SP + FIRST. */
3368 emit_insn (gen_rtx_SET (r2,
3369 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3371 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3372 if (rounded_size > (1 << 21))
3374 emit_move_insn (r3, GEN_INT (-rounded_size));
3375 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3377 else
3378 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3379 GEN_INT (-rounded_size))));
3382 /* Step 3: the loop
3384 while (TEST_ADDR != LAST_ADDR)
3386 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3387 probe at TEST_ADDR
3390 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3391 until it is equal to ROUNDED_SIZE. */
3393 emit_insn (gen_probe_stack_range (r2, r2, r3));
3396 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3397 that SIZE is equal to ROUNDED_SIZE. */
3399 /* TEMP = SIZE - ROUNDED_SIZE. */
3400 if (size != rounded_size)
3402 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3403 rounded_size - size)));
3404 emit_stack_probe (r2);
3408 /* Make sure nothing is scheduled before we are done. */
3409 emit_insn (gen_blockage ());
3412 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3413 absolute addresses. */
3415 const char *
3416 output_probe_stack_range (rtx reg1, rtx reg2)
3418 static int labelno = 0;
3419 char loop_lab[32], end_lab[32];
3420 rtx xops[3];
3422 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
3423 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
3425 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3427 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
3428 xops[0] = reg1;
3429 xops[1] = reg2;
3430 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3431 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3432 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]);
3433 assemble_name_raw (asm_out_file, end_lab);
3434 fputc ('\n', asm_out_file);
3436 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3437 xops[1] = GEN_INT (-PROBE_INTERVAL);
3438 output_asm_insn ("addl %0 = %1, %0", xops);
3439 fputs ("\t;;\n", asm_out_file);
3441 /* Probe at TEST_ADDR and branch. */
3442 output_asm_insn ("probe.w.fault %0, 0", xops);
3443 fprintf (asm_out_file, "\tbr ");
3444 assemble_name_raw (asm_out_file, loop_lab);
3445 fputc ('\n', asm_out_file);
3447 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
3449 return "";
3452 /* Called after register allocation to add any instructions needed for the
3453 prologue. Using a prologue insn is favored compared to putting all of the
3454 instructions in output_function_prologue(), since it allows the scheduler
3455 to intermix instructions with the saves of the caller saved registers. In
3456 some cases, it might be necessary to emit a barrier instruction as the last
3457 insn to prevent such scheduling.
3459 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3460 so that the debug info generation code can handle them properly.
3462 The register save area is laid out like so:
3463 cfa+16
3464 [ varargs spill area ]
3465 [ fr register spill area ]
3466 [ br register spill area ]
3467 [ ar register spill area ]
3468 [ pr register spill area ]
3469 [ gr register spill area ] */
3471 /* ??? Get inefficient code when the frame size is larger than can fit in an
3472 adds instruction. */
3474 void
3475 ia64_expand_prologue (void)
3477 rtx_insn *insn;
3478 rtx ar_pfs_save_reg, ar_unat_save_reg;
3479 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3480 rtx reg, alt_reg;
3482 ia64_compute_frame_size (get_frame_size ());
3483 last_scratch_gr_reg = 15;
3485 if (flag_stack_usage_info)
3486 current_function_static_stack_size = current_frame_info.total_size;
3488 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3490 HOST_WIDE_INT size = current_frame_info.total_size;
3491 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3492 + current_frame_info.n_local_regs);
3494 if (crtl->is_leaf && !cfun->calls_alloca)
3496 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3497 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3498 size - STACK_CHECK_PROTECT,
3499 bs_size);
3500 else if (size + bs_size > STACK_CHECK_PROTECT)
3501 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3503 else if (size + bs_size > 0)
3504 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3507 if (dump_file)
3509 fprintf (dump_file, "ia64 frame related registers "
3510 "recorded in current_frame_info.r[]:\n");
3511 #define PRINTREG(a) if (current_frame_info.r[a]) \
3512 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3513 PRINTREG(reg_fp);
3514 PRINTREG(reg_save_b0);
3515 PRINTREG(reg_save_pr);
3516 PRINTREG(reg_save_ar_pfs);
3517 PRINTREG(reg_save_ar_unat);
3518 PRINTREG(reg_save_ar_lc);
3519 PRINTREG(reg_save_gp);
3520 #undef PRINTREG
3523 /* If there is no epilogue, then we don't need some prologue insns.
3524 We need to avoid emitting the dead prologue insns, because flow
3525 will complain about them. */
3526 if (optimize)
3528 edge e;
3529 edge_iterator ei;
3531 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3532 if ((e->flags & EDGE_FAKE) == 0
3533 && (e->flags & EDGE_FALLTHRU) != 0)
3534 break;
3535 epilogue_p = (e != NULL);
3537 else
3538 epilogue_p = 1;
3540 /* Set the local, input, and output register names. We need to do this
3541 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3542 half. If we use in/loc/out register names, then we get assembler errors
3543 in crtn.S because there is no alloc insn or regstk directive in there. */
3544 if (! TARGET_REG_NAMES)
3546 int inputs = current_frame_info.n_input_regs;
3547 int locals = current_frame_info.n_local_regs;
3548 int outputs = current_frame_info.n_output_regs;
3550 for (i = 0; i < inputs; i++)
3551 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3552 for (i = 0; i < locals; i++)
3553 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3554 for (i = 0; i < outputs; i++)
3555 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3558 /* Set the frame pointer register name. The regnum is logically loc79,
3559 but of course we'll not have allocated that many locals. Rather than
3560 worrying about renumbering the existing rtxs, we adjust the name. */
3561 /* ??? This code means that we can never use one local register when
3562 there is a frame pointer. loc79 gets wasted in this case, as it is
3563 renamed to a register that will never be used. See also the try_locals
3564 code in find_gr_spill. */
3565 if (current_frame_info.r[reg_fp])
3567 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3568 reg_names[HARD_FRAME_POINTER_REGNUM]
3569 = reg_names[current_frame_info.r[reg_fp]];
3570 reg_names[current_frame_info.r[reg_fp]] = tmp;
3573 /* We don't need an alloc instruction if we've used no outputs or locals. */
3574 if (current_frame_info.n_local_regs == 0
3575 && current_frame_info.n_output_regs == 0
3576 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3577 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3579 /* If there is no alloc, but there are input registers used, then we
3580 need a .regstk directive. */
3581 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3582 ar_pfs_save_reg = NULL_RTX;
3584 else
3586 current_frame_info.need_regstk = 0;
3588 if (current_frame_info.r[reg_save_ar_pfs])
3590 regno = current_frame_info.r[reg_save_ar_pfs];
3591 reg_emitted (reg_save_ar_pfs);
3593 else
3594 regno = next_scratch_gr_reg ();
3595 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3597 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3598 GEN_INT (current_frame_info.n_input_regs),
3599 GEN_INT (current_frame_info.n_local_regs),
3600 GEN_INT (current_frame_info.n_output_regs),
3601 GEN_INT (current_frame_info.n_rotate_regs)));
3602 if (current_frame_info.r[reg_save_ar_pfs])
3604 RTX_FRAME_RELATED_P (insn) = 1;
3605 add_reg_note (insn, REG_CFA_REGISTER,
3606 gen_rtx_SET (ar_pfs_save_reg,
3607 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3611 /* Set up frame pointer, stack pointer, and spill iterators. */
3613 n_varargs = cfun->machine->n_varargs;
3614 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3615 stack_pointer_rtx, 0);
3617 if (frame_pointer_needed)
3619 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3620 RTX_FRAME_RELATED_P (insn) = 1;
3622 /* Force the unwind info to recognize this as defining a new CFA,
3623 rather than some temp register setup. */
3624 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3627 if (current_frame_info.total_size != 0)
3629 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3630 rtx offset;
3632 if (satisfies_constraint_I (frame_size_rtx))
3633 offset = frame_size_rtx;
3634 else
3636 regno = next_scratch_gr_reg ();
3637 offset = gen_rtx_REG (DImode, regno);
3638 emit_move_insn (offset, frame_size_rtx);
3641 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3642 stack_pointer_rtx, offset));
3644 if (! frame_pointer_needed)
3646 RTX_FRAME_RELATED_P (insn) = 1;
3647 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3648 gen_rtx_SET (stack_pointer_rtx,
3649 gen_rtx_PLUS (DImode,
3650 stack_pointer_rtx,
3651 frame_size_rtx)));
3654 /* ??? At this point we must generate a magic insn that appears to
3655 modify the stack pointer, the frame pointer, and all spill
3656 iterators. This would allow the most scheduling freedom. For
3657 now, just hard stop. */
3658 emit_insn (gen_blockage ());
3661 /* Must copy out ar.unat before doing any integer spills. */
3662 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3664 if (current_frame_info.r[reg_save_ar_unat])
3666 ar_unat_save_reg
3667 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3668 reg_emitted (reg_save_ar_unat);
3670 else
3672 alt_regno = next_scratch_gr_reg ();
3673 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3674 current_frame_info.gr_used_mask |= 1 << alt_regno;
3677 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3678 insn = emit_move_insn (ar_unat_save_reg, reg);
3679 if (current_frame_info.r[reg_save_ar_unat])
3681 RTX_FRAME_RELATED_P (insn) = 1;
3682 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3685 /* Even if we're not going to generate an epilogue, we still
3686 need to save the register so that EH works. */
3687 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3688 emit_insn (gen_prologue_use (ar_unat_save_reg));
3690 else
3691 ar_unat_save_reg = NULL_RTX;
3693 /* Spill all varargs registers. Do this before spilling any GR registers,
3694 since we want the UNAT bits for the GR registers to override the UNAT
3695 bits from varargs, which we don't care about. */
3697 cfa_off = -16;
3698 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3700 reg = gen_rtx_REG (DImode, regno);
3701 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3704 /* Locate the bottom of the register save area. */
3705 cfa_off = (current_frame_info.spill_cfa_off
3706 + current_frame_info.spill_size
3707 + current_frame_info.extra_spill_size);
3709 /* Save the predicate register block either in a register or in memory. */
3710 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3712 reg = gen_rtx_REG (DImode, PR_REG (0));
3713 if (current_frame_info.r[reg_save_pr] != 0)
3715 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3716 reg_emitted (reg_save_pr);
3717 insn = emit_move_insn (alt_reg, reg);
3719 /* ??? Denote pr spill/fill by a DImode move that modifies all
3720 64 hard registers. */
3721 RTX_FRAME_RELATED_P (insn) = 1;
3722 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3724 /* Even if we're not going to generate an epilogue, we still
3725 need to save the register so that EH works. */
3726 if (! epilogue_p)
3727 emit_insn (gen_prologue_use (alt_reg));
3729 else
3731 alt_regno = next_scratch_gr_reg ();
3732 alt_reg = gen_rtx_REG (DImode, alt_regno);
3733 insn = emit_move_insn (alt_reg, reg);
3734 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3735 cfa_off -= 8;
3739 /* Handle AR regs in numerical order. All of them get special handling. */
3740 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3741 && current_frame_info.r[reg_save_ar_unat] == 0)
3743 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3744 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3745 cfa_off -= 8;
3748 /* The alloc insn already copied ar.pfs into a general register. The
3749 only thing we have to do now is copy that register to a stack slot
3750 if we'd not allocated a local register for the job. */
3751 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3752 && current_frame_info.r[reg_save_ar_pfs] == 0)
3754 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3755 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3756 cfa_off -= 8;
3759 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3761 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3762 if (current_frame_info.r[reg_save_ar_lc] != 0)
3764 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3765 reg_emitted (reg_save_ar_lc);
3766 insn = emit_move_insn (alt_reg, reg);
3767 RTX_FRAME_RELATED_P (insn) = 1;
3768 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3770 /* Even if we're not going to generate an epilogue, we still
3771 need to save the register so that EH works. */
3772 if (! epilogue_p)
3773 emit_insn (gen_prologue_use (alt_reg));
3775 else
3777 alt_regno = next_scratch_gr_reg ();
3778 alt_reg = gen_rtx_REG (DImode, alt_regno);
3779 emit_move_insn (alt_reg, reg);
3780 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3781 cfa_off -= 8;
3785 /* Save the return pointer. */
3786 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3788 reg = gen_rtx_REG (DImode, BR_REG (0));
3789 if (current_frame_info.r[reg_save_b0] != 0)
3791 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3792 reg_emitted (reg_save_b0);
3793 insn = emit_move_insn (alt_reg, reg);
3794 RTX_FRAME_RELATED_P (insn) = 1;
3795 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3797 /* Even if we're not going to generate an epilogue, we still
3798 need to save the register so that EH works. */
3799 if (! epilogue_p)
3800 emit_insn (gen_prologue_use (alt_reg));
3802 else
3804 alt_regno = next_scratch_gr_reg ();
3805 alt_reg = gen_rtx_REG (DImode, alt_regno);
3806 emit_move_insn (alt_reg, reg);
3807 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3808 cfa_off -= 8;
3812 if (current_frame_info.r[reg_save_gp])
3814 reg_emitted (reg_save_gp);
3815 insn = emit_move_insn (gen_rtx_REG (DImode,
3816 current_frame_info.r[reg_save_gp]),
3817 pic_offset_table_rtx);
3820 /* We should now be at the base of the gr/br/fr spill area. */
3821 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3822 + current_frame_info.spill_size));
3824 /* Spill all general registers. */
3825 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3826 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3828 reg = gen_rtx_REG (DImode, regno);
3829 do_spill (gen_gr_spill, reg, cfa_off, reg);
3830 cfa_off -= 8;
3833 /* Spill the rest of the BR registers. */
3834 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3835 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3837 alt_regno = next_scratch_gr_reg ();
3838 alt_reg = gen_rtx_REG (DImode, alt_regno);
3839 reg = gen_rtx_REG (DImode, regno);
3840 emit_move_insn (alt_reg, reg);
3841 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3842 cfa_off -= 8;
3845 /* Align the frame and spill all FR registers. */
3846 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3847 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3849 gcc_assert (!(cfa_off & 15));
3850 reg = gen_rtx_REG (XFmode, regno);
3851 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3852 cfa_off -= 16;
3855 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3857 finish_spill_pointers ();
3860 /* Output the textual info surrounding the prologue. */
3862 void
3863 ia64_start_function (FILE *file, const char *fnname,
3864 tree decl ATTRIBUTE_UNUSED)
3866 #if TARGET_ABI_OPEN_VMS
3867 vms_start_function (fnname);
3868 #endif
3870 fputs ("\t.proc ", file);
3871 assemble_name (file, fnname);
3872 fputc ('\n', file);
3873 ASM_OUTPUT_LABEL (file, fnname);
3876 /* Called after register allocation to add any instructions needed for the
3877 epilogue. Using an epilogue insn is favored compared to putting all of the
3878 instructions in output_function_prologue(), since it allows the scheduler
3879 to intermix instructions with the saves of the caller saved registers. In
3880 some cases, it might be necessary to emit a barrier instruction as the last
3881 insn to prevent such scheduling. */
3883 void
3884 ia64_expand_epilogue (int sibcall_p)
3886 rtx_insn *insn;
3887 rtx reg, alt_reg, ar_unat_save_reg;
3888 int regno, alt_regno, cfa_off;
3890 ia64_compute_frame_size (get_frame_size ());
3892 /* If there is a frame pointer, then we use it instead of the stack
3893 pointer, so that the stack pointer does not need to be valid when
3894 the epilogue starts. See EXIT_IGNORE_STACK. */
3895 if (frame_pointer_needed)
3896 setup_spill_pointers (current_frame_info.n_spilled,
3897 hard_frame_pointer_rtx, 0);
3898 else
3899 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3900 current_frame_info.total_size);
3902 if (current_frame_info.total_size != 0)
3904 /* ??? At this point we must generate a magic insn that appears to
3905 modify the spill iterators and the frame pointer. This would
3906 allow the most scheduling freedom. For now, just hard stop. */
3907 emit_insn (gen_blockage ());
3910 /* Locate the bottom of the register save area. */
3911 cfa_off = (current_frame_info.spill_cfa_off
3912 + current_frame_info.spill_size
3913 + current_frame_info.extra_spill_size);
3915 /* Restore the predicate registers. */
3916 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3918 if (current_frame_info.r[reg_save_pr] != 0)
3920 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3921 reg_emitted (reg_save_pr);
3923 else
3925 alt_regno = next_scratch_gr_reg ();
3926 alt_reg = gen_rtx_REG (DImode, alt_regno);
3927 do_restore (gen_movdi_x, alt_reg, cfa_off);
3928 cfa_off -= 8;
3930 reg = gen_rtx_REG (DImode, PR_REG (0));
3931 emit_move_insn (reg, alt_reg);
3934 /* Restore the application registers. */
3936 /* Load the saved unat from the stack, but do not restore it until
3937 after the GRs have been restored. */
3938 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3940 if (current_frame_info.r[reg_save_ar_unat] != 0)
3942 ar_unat_save_reg
3943 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3944 reg_emitted (reg_save_ar_unat);
3946 else
3948 alt_regno = next_scratch_gr_reg ();
3949 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3950 current_frame_info.gr_used_mask |= 1 << alt_regno;
3951 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3952 cfa_off -= 8;
3955 else
3956 ar_unat_save_reg = NULL_RTX;
3958 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3960 reg_emitted (reg_save_ar_pfs);
3961 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3962 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3963 emit_move_insn (reg, alt_reg);
3965 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3967 alt_regno = next_scratch_gr_reg ();
3968 alt_reg = gen_rtx_REG (DImode, alt_regno);
3969 do_restore (gen_movdi_x, alt_reg, cfa_off);
3970 cfa_off -= 8;
3971 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3972 emit_move_insn (reg, alt_reg);
3975 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3977 if (current_frame_info.r[reg_save_ar_lc] != 0)
3979 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3980 reg_emitted (reg_save_ar_lc);
3982 else
3984 alt_regno = next_scratch_gr_reg ();
3985 alt_reg = gen_rtx_REG (DImode, alt_regno);
3986 do_restore (gen_movdi_x, alt_reg, cfa_off);
3987 cfa_off -= 8;
3989 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3990 emit_move_insn (reg, alt_reg);
3993 /* Restore the return pointer. */
3994 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3996 if (current_frame_info.r[reg_save_b0] != 0)
3998 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3999 reg_emitted (reg_save_b0);
4001 else
4003 alt_regno = next_scratch_gr_reg ();
4004 alt_reg = gen_rtx_REG (DImode, alt_regno);
4005 do_restore (gen_movdi_x, alt_reg, cfa_off);
4006 cfa_off -= 8;
4008 reg = gen_rtx_REG (DImode, BR_REG (0));
4009 emit_move_insn (reg, alt_reg);
4012 /* We should now be at the base of the gr/br/fr spill area. */
4013 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4014 + current_frame_info.spill_size));
4016 /* The GP may be stored on the stack in the prologue, but it's
4017 never restored in the epilogue. Skip the stack slot. */
4018 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4019 cfa_off -= 8;
4021 /* Restore all general registers. */
4022 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4023 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4025 reg = gen_rtx_REG (DImode, regno);
4026 do_restore (gen_gr_restore, reg, cfa_off);
4027 cfa_off -= 8;
4030 /* Restore the branch registers. */
4031 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4032 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4034 alt_regno = next_scratch_gr_reg ();
4035 alt_reg = gen_rtx_REG (DImode, alt_regno);
4036 do_restore (gen_movdi_x, alt_reg, cfa_off);
4037 cfa_off -= 8;
4038 reg = gen_rtx_REG (DImode, regno);
4039 emit_move_insn (reg, alt_reg);
4042 /* Restore floating point registers. */
4043 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4044 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4046 gcc_assert (!(cfa_off & 15));
4047 reg = gen_rtx_REG (XFmode, regno);
4048 do_restore (gen_fr_restore_x, reg, cfa_off);
4049 cfa_off -= 16;
4052 /* Restore ar.unat for real. */
4053 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4055 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4056 emit_move_insn (reg, ar_unat_save_reg);
4059 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4061 finish_spill_pointers ();
4063 if (current_frame_info.total_size
4064 || cfun->machine->ia64_eh_epilogue_sp
4065 || frame_pointer_needed)
4067 /* ??? At this point we must generate a magic insn that appears to
4068 modify the spill iterators, the stack pointer, and the frame
4069 pointer. This would allow the most scheduling freedom. For now,
4070 just hard stop. */
4071 emit_insn (gen_blockage ());
4074 if (cfun->machine->ia64_eh_epilogue_sp)
4075 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4076 else if (frame_pointer_needed)
4078 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4079 RTX_FRAME_RELATED_P (insn) = 1;
4080 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4082 else if (current_frame_info.total_size)
4084 rtx offset, frame_size_rtx;
4086 frame_size_rtx = GEN_INT (current_frame_info.total_size);
4087 if (satisfies_constraint_I (frame_size_rtx))
4088 offset = frame_size_rtx;
4089 else
4091 regno = next_scratch_gr_reg ();
4092 offset = gen_rtx_REG (DImode, regno);
4093 emit_move_insn (offset, frame_size_rtx);
4096 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4097 offset));
4099 RTX_FRAME_RELATED_P (insn) = 1;
4100 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4101 gen_rtx_SET (stack_pointer_rtx,
4102 gen_rtx_PLUS (DImode,
4103 stack_pointer_rtx,
4104 frame_size_rtx)));
4107 if (cfun->machine->ia64_eh_epilogue_bsp)
4108 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4110 if (! sibcall_p)
4111 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4112 else
4114 int fp = GR_REG (2);
4115 /* We need a throw away register here, r0 and r1 are reserved,
4116 so r2 is the first available call clobbered register. If
4117 there was a frame_pointer register, we may have swapped the
4118 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4119 sure we're using the string "r2" when emitting the register
4120 name for the assembler. */
4121 if (current_frame_info.r[reg_fp]
4122 && current_frame_info.r[reg_fp] == GR_REG (2))
4123 fp = HARD_FRAME_POINTER_REGNUM;
4125 /* We must emit an alloc to force the input registers to become output
4126 registers. Otherwise, if the callee tries to pass its parameters
4127 through to another call without an intervening alloc, then these
4128 values get lost. */
4129 /* ??? We don't need to preserve all input registers. We only need to
4130 preserve those input registers used as arguments to the sibling call.
4131 It is unclear how to compute that number here. */
4132 if (current_frame_info.n_input_regs != 0)
4134 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4136 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4137 const0_rtx, const0_rtx,
4138 n_inputs, const0_rtx));
4139 RTX_FRAME_RELATED_P (insn) = 1;
4141 /* ??? We need to mark the alloc as frame-related so that it gets
4142 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4143 But there's nothing dwarf2 related to be done wrt the register
4144 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4145 the empty parallel means dwarf2out will not see anything. */
4146 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4147 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4152 /* Return 1 if br.ret can do all the work required to return from a
4153 function. */
4156 ia64_direct_return (void)
4158 if (reload_completed && ! frame_pointer_needed)
4160 ia64_compute_frame_size (get_frame_size ());
4162 return (current_frame_info.total_size == 0
4163 && current_frame_info.n_spilled == 0
4164 && current_frame_info.r[reg_save_b0] == 0
4165 && current_frame_info.r[reg_save_pr] == 0
4166 && current_frame_info.r[reg_save_ar_pfs] == 0
4167 && current_frame_info.r[reg_save_ar_unat] == 0
4168 && current_frame_info.r[reg_save_ar_lc] == 0);
4170 return 0;
4173 /* Return the magic cookie that we use to hold the return address
4174 during early compilation. */
4177 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4179 if (count != 0)
4180 return NULL;
4181 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4184 /* Split this value after reload, now that we know where the return
4185 address is saved. */
4187 void
4188 ia64_split_return_addr_rtx (rtx dest)
4190 rtx src;
4192 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4194 if (current_frame_info.r[reg_save_b0] != 0)
4196 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4197 reg_emitted (reg_save_b0);
4199 else
4201 HOST_WIDE_INT off;
4202 unsigned int regno;
4203 rtx off_r;
4205 /* Compute offset from CFA for BR0. */
4206 /* ??? Must be kept in sync with ia64_expand_prologue. */
4207 off = (current_frame_info.spill_cfa_off
4208 + current_frame_info.spill_size);
4209 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4210 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4211 off -= 8;
4213 /* Convert CFA offset to a register based offset. */
4214 if (frame_pointer_needed)
4215 src = hard_frame_pointer_rtx;
4216 else
4218 src = stack_pointer_rtx;
4219 off += current_frame_info.total_size;
4222 /* Load address into scratch register. */
4223 off_r = GEN_INT (off);
4224 if (satisfies_constraint_I (off_r))
4225 emit_insn (gen_adddi3 (dest, src, off_r));
4226 else
4228 emit_move_insn (dest, off_r);
4229 emit_insn (gen_adddi3 (dest, src, dest));
4232 src = gen_rtx_MEM (Pmode, dest);
4235 else
4236 src = gen_rtx_REG (DImode, BR_REG (0));
4238 emit_move_insn (dest, src);
4242 ia64_hard_regno_rename_ok (int from, int to)
4244 /* Don't clobber any of the registers we reserved for the prologue. */
4245 unsigned int r;
4247 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4248 if (to == current_frame_info.r[r]
4249 || from == current_frame_info.r[r]
4250 || to == emitted_frame_related_regs[r]
4251 || from == emitted_frame_related_regs[r])
4252 return 0;
4254 /* Don't use output registers outside the register frame. */
4255 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4256 return 0;
4258 /* Retain even/oddness on predicate register pairs. */
4259 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4260 return (from & 1) == (to & 1);
4262 return 1;
4265 /* Target hook for assembling integer objects. Handle word-sized
4266 aligned objects and detect the cases when @fptr is needed. */
4268 static bool
4269 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4271 if (size == POINTER_SIZE / BITS_PER_UNIT
4272 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4273 && GET_CODE (x) == SYMBOL_REF
4274 && SYMBOL_REF_FUNCTION_P (x))
4276 static const char * const directive[2][2] = {
4277 /* 64-bit pointer */ /* 32-bit pointer */
4278 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4279 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4281 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4282 output_addr_const (asm_out_file, x);
4283 fputs (")\n", asm_out_file);
4284 return true;
4286 return default_assemble_integer (x, size, aligned_p);
4289 /* Emit the function prologue. */
4291 static void
4292 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4294 int mask, grsave, grsave_prev;
4296 if (current_frame_info.need_regstk)
4297 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4298 current_frame_info.n_input_regs,
4299 current_frame_info.n_local_regs,
4300 current_frame_info.n_output_regs,
4301 current_frame_info.n_rotate_regs);
4303 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4304 return;
4306 /* Emit the .prologue directive. */
4308 mask = 0;
4309 grsave = grsave_prev = 0;
4310 if (current_frame_info.r[reg_save_b0] != 0)
4312 mask |= 8;
4313 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4315 if (current_frame_info.r[reg_save_ar_pfs] != 0
4316 && (grsave_prev == 0
4317 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4319 mask |= 4;
4320 if (grsave_prev == 0)
4321 grsave = current_frame_info.r[reg_save_ar_pfs];
4322 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4324 if (current_frame_info.r[reg_fp] != 0
4325 && (grsave_prev == 0
4326 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4328 mask |= 2;
4329 if (grsave_prev == 0)
4330 grsave = HARD_FRAME_POINTER_REGNUM;
4331 grsave_prev = current_frame_info.r[reg_fp];
4333 if (current_frame_info.r[reg_save_pr] != 0
4334 && (grsave_prev == 0
4335 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4337 mask |= 1;
4338 if (grsave_prev == 0)
4339 grsave = current_frame_info.r[reg_save_pr];
4342 if (mask && TARGET_GNU_AS)
4343 fprintf (file, "\t.prologue %d, %d\n", mask,
4344 ia64_dbx_register_number (grsave));
4345 else
4346 fputs ("\t.prologue\n", file);
4348 /* Emit a .spill directive, if necessary, to relocate the base of
4349 the register spill area. */
4350 if (current_frame_info.spill_cfa_off != -16)
4351 fprintf (file, "\t.spill %ld\n",
4352 (long) (current_frame_info.spill_cfa_off
4353 + current_frame_info.spill_size));
4356 /* Emit the .body directive at the scheduled end of the prologue. */
4358 static void
4359 ia64_output_function_end_prologue (FILE *file)
4361 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4362 return;
4364 fputs ("\t.body\n", file);
4367 /* Emit the function epilogue. */
4369 static void
4370 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4371 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4373 int i;
4375 if (current_frame_info.r[reg_fp])
4377 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4378 reg_names[HARD_FRAME_POINTER_REGNUM]
4379 = reg_names[current_frame_info.r[reg_fp]];
4380 reg_names[current_frame_info.r[reg_fp]] = tmp;
4381 reg_emitted (reg_fp);
4383 if (! TARGET_REG_NAMES)
4385 for (i = 0; i < current_frame_info.n_input_regs; i++)
4386 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4387 for (i = 0; i < current_frame_info.n_local_regs; i++)
4388 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4389 for (i = 0; i < current_frame_info.n_output_regs; i++)
4390 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4393 current_frame_info.initialized = 0;
4397 ia64_dbx_register_number (int regno)
4399 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4400 from its home at loc79 to something inside the register frame. We
4401 must perform the same renumbering here for the debug info. */
4402 if (current_frame_info.r[reg_fp])
4404 if (regno == HARD_FRAME_POINTER_REGNUM)
4405 regno = current_frame_info.r[reg_fp];
4406 else if (regno == current_frame_info.r[reg_fp])
4407 regno = HARD_FRAME_POINTER_REGNUM;
4410 if (IN_REGNO_P (regno))
4411 return 32 + regno - IN_REG (0);
4412 else if (LOC_REGNO_P (regno))
4413 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4414 else if (OUT_REGNO_P (regno))
4415 return (32 + current_frame_info.n_input_regs
4416 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4417 else
4418 return regno;
4421 /* Implement TARGET_TRAMPOLINE_INIT.
4423 The trampoline should set the static chain pointer to value placed
4424 into the trampoline and should branch to the specified routine.
4425 To make the normal indirect-subroutine calling convention work,
4426 the trampoline must look like a function descriptor; the first
4427 word being the target address and the second being the target's
4428 global pointer.
4430 We abuse the concept of a global pointer by arranging for it
4431 to point to the data we need to load. The complete trampoline
4432 has the following form:
4434 +-------------------+ \
4435 TRAMP: | __ia64_trampoline | |
4436 +-------------------+ > fake function descriptor
4437 | TRAMP+16 | |
4438 +-------------------+ /
4439 | target descriptor |
4440 +-------------------+
4441 | static link |
4442 +-------------------+
4445 static void
4446 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4448 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4449 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4451 /* The Intel assembler requires that the global __ia64_trampoline symbol
4452 be declared explicitly */
4453 if (!TARGET_GNU_AS)
4455 static bool declared_ia64_trampoline = false;
4457 if (!declared_ia64_trampoline)
4459 declared_ia64_trampoline = true;
4460 (*targetm.asm_out.globalize_label) (asm_out_file,
4461 "__ia64_trampoline");
4465 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4466 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4467 fnaddr = convert_memory_address (Pmode, fnaddr);
4468 static_chain = convert_memory_address (Pmode, static_chain);
4470 /* Load up our iterator. */
4471 addr_reg = copy_to_reg (addr);
4472 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4474 /* The first two words are the fake descriptor:
4475 __ia64_trampoline, ADDR+16. */
4476 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4477 if (TARGET_ABI_OPEN_VMS)
4479 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4480 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4481 relocation against function symbols to make it identical to the
4482 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4483 strict ELF and dereference to get the bare code address. */
4484 rtx reg = gen_reg_rtx (Pmode);
4485 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4486 emit_move_insn (reg, tramp);
4487 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4488 tramp = reg;
4490 emit_move_insn (m_tramp, tramp);
4491 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4492 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4494 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4495 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4496 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4498 /* The third word is the target descriptor. */
4499 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4500 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4501 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4503 /* The fourth word is the static chain. */
4504 emit_move_insn (m_tramp, static_chain);
4507 /* Do any needed setup for a variadic function. CUM has not been updated
4508 for the last named argument which has type TYPE and mode MODE.
4510 We generate the actual spill instructions during prologue generation. */
4512 static void
4513 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4514 tree type, int * pretend_size,
4515 int second_time ATTRIBUTE_UNUSED)
4517 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4519 /* Skip the current argument. */
4520 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4522 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4524 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4525 *pretend_size = n * UNITS_PER_WORD;
4526 cfun->machine->n_varargs = n;
4530 /* Check whether TYPE is a homogeneous floating point aggregate. If
4531 it is, return the mode of the floating point type that appears
4532 in all leafs. If it is not, return VOIDmode.
4534 An aggregate is a homogeneous floating point aggregate is if all
4535 fields/elements in it have the same floating point type (e.g,
4536 SFmode). 128-bit quad-precision floats are excluded.
4538 Variable sized aggregates should never arrive here, since we should
4539 have already decided to pass them by reference. Top-level zero-sized
4540 aggregates are excluded because our parallels crash the middle-end. */
4542 static machine_mode
4543 hfa_element_mode (const_tree type, bool nested)
4545 machine_mode element_mode = VOIDmode;
4546 machine_mode mode;
4547 enum tree_code code = TREE_CODE (type);
4548 int know_element_mode = 0;
4549 tree t;
4551 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4552 return VOIDmode;
4554 switch (code)
4556 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4557 case BOOLEAN_TYPE: case POINTER_TYPE:
4558 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4559 case LANG_TYPE: case FUNCTION_TYPE:
4560 return VOIDmode;
4562 /* Fortran complex types are supposed to be HFAs, so we need to handle
4563 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4564 types though. */
4565 case COMPLEX_TYPE:
4566 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4567 && TYPE_MODE (type) != TCmode)
4568 return GET_MODE_INNER (TYPE_MODE (type));
4569 else
4570 return VOIDmode;
4572 case REAL_TYPE:
4573 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4574 mode if this is contained within an aggregate. */
4575 if (nested && TYPE_MODE (type) != TFmode)
4576 return TYPE_MODE (type);
4577 else
4578 return VOIDmode;
4580 case ARRAY_TYPE:
4581 return hfa_element_mode (TREE_TYPE (type), 1);
4583 case RECORD_TYPE:
4584 case UNION_TYPE:
4585 case QUAL_UNION_TYPE:
4586 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4588 if (TREE_CODE (t) != FIELD_DECL)
4589 continue;
4591 mode = hfa_element_mode (TREE_TYPE (t), 1);
4592 if (know_element_mode)
4594 if (mode != element_mode)
4595 return VOIDmode;
4597 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4598 return VOIDmode;
4599 else
4601 know_element_mode = 1;
4602 element_mode = mode;
4605 return element_mode;
4607 default:
4608 /* If we reach here, we probably have some front-end specific type
4609 that the backend doesn't know about. This can happen via the
4610 aggregate_value_p call in init_function_start. All we can do is
4611 ignore unknown tree types. */
4612 return VOIDmode;
4615 return VOIDmode;
4618 /* Return the number of words required to hold a quantity of TYPE and MODE
4619 when passed as an argument. */
4620 static int
4621 ia64_function_arg_words (const_tree type, machine_mode mode)
4623 int words;
4625 if (mode == BLKmode)
4626 words = int_size_in_bytes (type);
4627 else
4628 words = GET_MODE_SIZE (mode);
4630 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4633 /* Return the number of registers that should be skipped so the current
4634 argument (described by TYPE and WORDS) will be properly aligned.
4636 Integer and float arguments larger than 8 bytes start at the next
4637 even boundary. Aggregates larger than 8 bytes start at the next
4638 even boundary if the aggregate has 16 byte alignment. Note that
4639 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4640 but are still to be aligned in registers.
4642 ??? The ABI does not specify how to handle aggregates with
4643 alignment from 9 to 15 bytes, or greater than 16. We handle them
4644 all as if they had 16 byte alignment. Such aggregates can occur
4645 only if gcc extensions are used. */
4646 static int
4647 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4648 const_tree type, int words)
4650 /* No registers are skipped on VMS. */
4651 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4652 return 0;
4654 if (type
4655 && TREE_CODE (type) != INTEGER_TYPE
4656 && TREE_CODE (type) != REAL_TYPE)
4657 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4658 else
4659 return words > 1;
4662 /* Return rtx for register where argument is passed, or zero if it is passed
4663 on the stack. */
4664 /* ??? 128-bit quad-precision floats are always passed in general
4665 registers. */
4667 static rtx
4668 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4669 const_tree type, bool named, bool incoming)
4671 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4673 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4674 int words = ia64_function_arg_words (type, mode);
4675 int offset = ia64_function_arg_offset (cum, type, words);
4676 machine_mode hfa_mode = VOIDmode;
4678 /* For OPEN VMS, emit the instruction setting up the argument register here,
4679 when we know this will be together with the other arguments setup related
4680 insns. This is not the conceptually best place to do this, but this is
4681 the easiest as we have convenient access to cumulative args info. */
4683 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4684 && named == 1)
4686 unsigned HOST_WIDE_INT regval = cum->words;
4687 int i;
4689 for (i = 0; i < 8; i++)
4690 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4692 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4693 GEN_INT (regval));
4696 /* If all argument slots are used, then it must go on the stack. */
4697 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4698 return 0;
4700 /* On OpenVMS argument is either in Rn or Fn. */
4701 if (TARGET_ABI_OPEN_VMS)
4703 if (FLOAT_MODE_P (mode))
4704 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4705 else
4706 return gen_rtx_REG (mode, basereg + cum->words);
4709 /* Check for and handle homogeneous FP aggregates. */
4710 if (type)
4711 hfa_mode = hfa_element_mode (type, 0);
4713 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4714 and unprototyped hfas are passed specially. */
4715 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4717 rtx loc[16];
4718 int i = 0;
4719 int fp_regs = cum->fp_regs;
4720 int int_regs = cum->words + offset;
4721 int hfa_size = GET_MODE_SIZE (hfa_mode);
4722 int byte_size;
4723 int args_byte_size;
4725 /* If prototyped, pass it in FR regs then GR regs.
4726 If not prototyped, pass it in both FR and GR regs.
4728 If this is an SFmode aggregate, then it is possible to run out of
4729 FR regs while GR regs are still left. In that case, we pass the
4730 remaining part in the GR regs. */
4732 /* Fill the FP regs. We do this always. We stop if we reach the end
4733 of the argument, the last FP register, or the last argument slot. */
4735 byte_size = ((mode == BLKmode)
4736 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4737 args_byte_size = int_regs * UNITS_PER_WORD;
4738 offset = 0;
4739 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4740 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4742 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4743 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4744 + fp_regs)),
4745 GEN_INT (offset));
4746 offset += hfa_size;
4747 args_byte_size += hfa_size;
4748 fp_regs++;
4751 /* If no prototype, then the whole thing must go in GR regs. */
4752 if (! cum->prototype)
4753 offset = 0;
4754 /* If this is an SFmode aggregate, then we might have some left over
4755 that needs to go in GR regs. */
4756 else if (byte_size != offset)
4757 int_regs += offset / UNITS_PER_WORD;
4759 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4761 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4763 machine_mode gr_mode = DImode;
4764 unsigned int gr_size;
4766 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4767 then this goes in a GR reg left adjusted/little endian, right
4768 adjusted/big endian. */
4769 /* ??? Currently this is handled wrong, because 4-byte hunks are
4770 always right adjusted/little endian. */
4771 if (offset & 0x4)
4772 gr_mode = SImode;
4773 /* If we have an even 4 byte hunk because the aggregate is a
4774 multiple of 4 bytes in size, then this goes in a GR reg right
4775 adjusted/little endian. */
4776 else if (byte_size - offset == 4)
4777 gr_mode = SImode;
4779 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4780 gen_rtx_REG (gr_mode, (basereg
4781 + int_regs)),
4782 GEN_INT (offset));
4784 gr_size = GET_MODE_SIZE (gr_mode);
4785 offset += gr_size;
4786 if (gr_size == UNITS_PER_WORD
4787 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4788 int_regs++;
4789 else if (gr_size > UNITS_PER_WORD)
4790 int_regs += gr_size / UNITS_PER_WORD;
4792 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4795 /* Integral and aggregates go in general registers. If we have run out of
4796 FR registers, then FP values must also go in general registers. This can
4797 happen when we have a SFmode HFA. */
4798 else if (mode == TFmode || mode == TCmode
4799 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4801 int byte_size = ((mode == BLKmode)
4802 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4803 if (BYTES_BIG_ENDIAN
4804 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4805 && byte_size < UNITS_PER_WORD
4806 && byte_size > 0)
4808 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4809 gen_rtx_REG (DImode,
4810 (basereg + cum->words
4811 + offset)),
4812 const0_rtx);
4813 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4815 else
4816 return gen_rtx_REG (mode, basereg + cum->words + offset);
4820 /* If there is a prototype, then FP values go in a FR register when
4821 named, and in a GR register when unnamed. */
4822 else if (cum->prototype)
4824 if (named)
4825 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4826 /* In big-endian mode, an anonymous SFmode value must be represented
4827 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4828 the value into the high half of the general register. */
4829 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4830 return gen_rtx_PARALLEL (mode,
4831 gen_rtvec (1,
4832 gen_rtx_EXPR_LIST (VOIDmode,
4833 gen_rtx_REG (DImode, basereg + cum->words + offset),
4834 const0_rtx)));
4835 else
4836 return gen_rtx_REG (mode, basereg + cum->words + offset);
4838 /* If there is no prototype, then FP values go in both FR and GR
4839 registers. */
4840 else
4842 /* See comment above. */
4843 machine_mode inner_mode =
4844 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4846 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4847 gen_rtx_REG (mode, (FR_ARG_FIRST
4848 + cum->fp_regs)),
4849 const0_rtx);
4850 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4851 gen_rtx_REG (inner_mode,
4852 (basereg + cum->words
4853 + offset)),
4854 const0_rtx);
4856 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4860 /* Implement TARGET_FUNCION_ARG target hook. */
4862 static rtx
4863 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4864 const_tree type, bool named)
4866 return ia64_function_arg_1 (cum, mode, type, named, false);
4869 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4871 static rtx
4872 ia64_function_incoming_arg (cumulative_args_t cum,
4873 machine_mode mode,
4874 const_tree type, bool named)
4876 return ia64_function_arg_1 (cum, mode, type, named, true);
4879 /* Return number of bytes, at the beginning of the argument, that must be
4880 put in registers. 0 is the argument is entirely in registers or entirely
4881 in memory. */
4883 static int
4884 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4885 tree type, bool named ATTRIBUTE_UNUSED)
4887 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4889 int words = ia64_function_arg_words (type, mode);
4890 int offset = ia64_function_arg_offset (cum, type, words);
4892 /* If all argument slots are used, then it must go on the stack. */
4893 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4894 return 0;
4896 /* It doesn't matter whether the argument goes in FR or GR regs. If
4897 it fits within the 8 argument slots, then it goes entirely in
4898 registers. If it extends past the last argument slot, then the rest
4899 goes on the stack. */
4901 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4902 return 0;
4904 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4907 /* Return ivms_arg_type based on machine_mode. */
4909 static enum ivms_arg_type
4910 ia64_arg_type (machine_mode mode)
4912 switch (mode)
4914 case SFmode:
4915 return FS;
4916 case DFmode:
4917 return FT;
4918 default:
4919 return I64;
4923 /* Update CUM to point after this argument. This is patterned after
4924 ia64_function_arg. */
4926 static void
4927 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4928 const_tree type, bool named)
4930 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4931 int words = ia64_function_arg_words (type, mode);
4932 int offset = ia64_function_arg_offset (cum, type, words);
4933 machine_mode hfa_mode = VOIDmode;
4935 /* If all arg slots are already full, then there is nothing to do. */
4936 if (cum->words >= MAX_ARGUMENT_SLOTS)
4938 cum->words += words + offset;
4939 return;
4942 cum->atypes[cum->words] = ia64_arg_type (mode);
4943 cum->words += words + offset;
4945 /* On OpenVMS argument is either in Rn or Fn. */
4946 if (TARGET_ABI_OPEN_VMS)
4948 cum->int_regs = cum->words;
4949 cum->fp_regs = cum->words;
4950 return;
4953 /* Check for and handle homogeneous FP aggregates. */
4954 if (type)
4955 hfa_mode = hfa_element_mode (type, 0);
4957 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4958 and unprototyped hfas are passed specially. */
4959 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4961 int fp_regs = cum->fp_regs;
4962 /* This is the original value of cum->words + offset. */
4963 int int_regs = cum->words - words;
4964 int hfa_size = GET_MODE_SIZE (hfa_mode);
4965 int byte_size;
4966 int args_byte_size;
4968 /* If prototyped, pass it in FR regs then GR regs.
4969 If not prototyped, pass it in both FR and GR regs.
4971 If this is an SFmode aggregate, then it is possible to run out of
4972 FR regs while GR regs are still left. In that case, we pass the
4973 remaining part in the GR regs. */
4975 /* Fill the FP regs. We do this always. We stop if we reach the end
4976 of the argument, the last FP register, or the last argument slot. */
4978 byte_size = ((mode == BLKmode)
4979 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4980 args_byte_size = int_regs * UNITS_PER_WORD;
4981 offset = 0;
4982 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4983 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4985 offset += hfa_size;
4986 args_byte_size += hfa_size;
4987 fp_regs++;
4990 cum->fp_regs = fp_regs;
4993 /* Integral and aggregates go in general registers. So do TFmode FP values.
4994 If we have run out of FR registers, then other FP values must also go in
4995 general registers. This can happen when we have a SFmode HFA. */
4996 else if (mode == TFmode || mode == TCmode
4997 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4998 cum->int_regs = cum->words;
5000 /* If there is a prototype, then FP values go in a FR register when
5001 named, and in a GR register when unnamed. */
5002 else if (cum->prototype)
5004 if (! named)
5005 cum->int_regs = cum->words;
5006 else
5007 /* ??? Complex types should not reach here. */
5008 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5010 /* If there is no prototype, then FP values go in both FR and GR
5011 registers. */
5012 else
5014 /* ??? Complex types should not reach here. */
5015 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5016 cum->int_regs = cum->words;
5020 /* Arguments with alignment larger than 8 bytes start at the next even
5021 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5022 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5024 static unsigned int
5025 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5027 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5028 return PARM_BOUNDARY * 2;
5030 if (type)
5032 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5033 return PARM_BOUNDARY * 2;
5034 else
5035 return PARM_BOUNDARY;
5038 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5039 return PARM_BOUNDARY * 2;
5040 else
5041 return PARM_BOUNDARY;
5044 /* True if it is OK to do sibling call optimization for the specified
5045 call expression EXP. DECL will be the called function, or NULL if
5046 this is an indirect call. */
5047 static bool
5048 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5050 /* We can't perform a sibcall if the current function has the syscall_linkage
5051 attribute. */
5052 if (lookup_attribute ("syscall_linkage",
5053 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5054 return false;
5056 /* We must always return with our current GP. This means we can
5057 only sibcall to functions defined in the current module unless
5058 TARGET_CONST_GP is set to true. */
5059 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5063 /* Implement va_arg. */
5065 static tree
5066 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5067 gimple_seq *post_p)
5069 /* Variable sized types are passed by reference. */
5070 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5072 tree ptrtype = build_pointer_type (type);
5073 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5074 return build_va_arg_indirect_ref (addr);
5077 /* Aggregate arguments with alignment larger than 8 bytes start at
5078 the next even boundary. Integer and floating point arguments
5079 do so if they are larger than 8 bytes, whether or not they are
5080 also aligned larger than 8 bytes. */
5081 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5082 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5084 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5085 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5086 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5087 gimplify_assign (unshare_expr (valist), t, pre_p);
5090 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5093 /* Return 1 if function return value returned in memory. Return 0 if it is
5094 in a register. */
5096 static bool
5097 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5099 machine_mode mode;
5100 machine_mode hfa_mode;
5101 HOST_WIDE_INT byte_size;
5103 mode = TYPE_MODE (valtype);
5104 byte_size = GET_MODE_SIZE (mode);
5105 if (mode == BLKmode)
5107 byte_size = int_size_in_bytes (valtype);
5108 if (byte_size < 0)
5109 return true;
5112 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5114 hfa_mode = hfa_element_mode (valtype, 0);
5115 if (hfa_mode != VOIDmode)
5117 int hfa_size = GET_MODE_SIZE (hfa_mode);
5119 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5120 return true;
5121 else
5122 return false;
5124 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5125 return true;
5126 else
5127 return false;
5130 /* Return rtx for register that holds the function return value. */
5132 static rtx
5133 ia64_function_value (const_tree valtype,
5134 const_tree fn_decl_or_type,
5135 bool outgoing ATTRIBUTE_UNUSED)
5137 machine_mode mode;
5138 machine_mode hfa_mode;
5139 int unsignedp;
5140 const_tree func = fn_decl_or_type;
5142 if (fn_decl_or_type
5143 && !DECL_P (fn_decl_or_type))
5144 func = NULL;
5146 mode = TYPE_MODE (valtype);
5147 hfa_mode = hfa_element_mode (valtype, 0);
5149 if (hfa_mode != VOIDmode)
5151 rtx loc[8];
5152 int i;
5153 int hfa_size;
5154 int byte_size;
5155 int offset;
5157 hfa_size = GET_MODE_SIZE (hfa_mode);
5158 byte_size = ((mode == BLKmode)
5159 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5160 offset = 0;
5161 for (i = 0; offset < byte_size; i++)
5163 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5164 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5165 GEN_INT (offset));
5166 offset += hfa_size;
5168 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5170 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5171 return gen_rtx_REG (mode, FR_ARG_FIRST);
5172 else
5174 bool need_parallel = false;
5176 /* In big-endian mode, we need to manage the layout of aggregates
5177 in the registers so that we get the bits properly aligned in
5178 the highpart of the registers. */
5179 if (BYTES_BIG_ENDIAN
5180 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5181 need_parallel = true;
5183 /* Something like struct S { long double x; char a[0] } is not an
5184 HFA structure, and therefore doesn't go in fp registers. But
5185 the middle-end will give it XFmode anyway, and XFmode values
5186 don't normally fit in integer registers. So we need to smuggle
5187 the value inside a parallel. */
5188 else if (mode == XFmode || mode == XCmode || mode == RFmode)
5189 need_parallel = true;
5191 if (need_parallel)
5193 rtx loc[8];
5194 int offset;
5195 int bytesize;
5196 int i;
5198 offset = 0;
5199 bytesize = int_size_in_bytes (valtype);
5200 /* An empty PARALLEL is invalid here, but the return value
5201 doesn't matter for empty structs. */
5202 if (bytesize == 0)
5203 return gen_rtx_REG (mode, GR_RET_FIRST);
5204 for (i = 0; offset < bytesize; i++)
5206 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5207 gen_rtx_REG (DImode,
5208 GR_RET_FIRST + i),
5209 GEN_INT (offset));
5210 offset += UNITS_PER_WORD;
5212 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5215 mode = promote_function_mode (valtype, mode, &unsignedp,
5216 func ? TREE_TYPE (func) : NULL_TREE,
5217 true);
5219 return gen_rtx_REG (mode, GR_RET_FIRST);
5223 /* Worker function for TARGET_LIBCALL_VALUE. */
5225 static rtx
5226 ia64_libcall_value (machine_mode mode,
5227 const_rtx fun ATTRIBUTE_UNUSED)
5229 return gen_rtx_REG (mode,
5230 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5231 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5232 && (mode) != TFmode)
5233 ? FR_RET_FIRST : GR_RET_FIRST));
5236 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5238 static bool
5239 ia64_function_value_regno_p (const unsigned int regno)
5241 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5242 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5245 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5246 We need to emit DTP-relative relocations. */
5248 static void
5249 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5251 gcc_assert (size == 4 || size == 8);
5252 if (size == 4)
5253 fputs ("\tdata4.ua\t@dtprel(", file);
5254 else
5255 fputs ("\tdata8.ua\t@dtprel(", file);
5256 output_addr_const (file, x);
5257 fputs (")", file);
5260 /* Print a memory address as an operand to reference that memory location. */
5262 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5263 also call this from ia64_print_operand for memory addresses. */
5265 static void
5266 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5267 rtx address ATTRIBUTE_UNUSED)
5271 /* Print an operand to an assembler instruction.
5272 C Swap and print a comparison operator.
5273 D Print an FP comparison operator.
5274 E Print 32 - constant, for SImode shifts as extract.
5275 e Print 64 - constant, for DImode rotates.
5276 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5277 a floating point register emitted normally.
5278 G A floating point constant.
5279 I Invert a predicate register by adding 1.
5280 J Select the proper predicate register for a condition.
5281 j Select the inverse predicate register for a condition.
5282 O Append .acq for volatile load.
5283 P Postincrement of a MEM.
5284 Q Append .rel for volatile store.
5285 R Print .s .d or nothing for a single, double or no truncation.
5286 S Shift amount for shladd instruction.
5287 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5288 for Intel assembler.
5289 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5290 for Intel assembler.
5291 X A pair of floating point registers.
5292 r Print register name, or constant 0 as r0. HP compatibility for
5293 Linux kernel.
5294 v Print vector constant value as an 8-byte integer value. */
5296 static void
5297 ia64_print_operand (FILE * file, rtx x, int code)
5299 const char *str;
5301 switch (code)
5303 case 0:
5304 /* Handled below. */
5305 break;
5307 case 'C':
5309 enum rtx_code c = swap_condition (GET_CODE (x));
5310 fputs (GET_RTX_NAME (c), file);
5311 return;
5314 case 'D':
5315 switch (GET_CODE (x))
5317 case NE:
5318 str = "neq";
5319 break;
5320 case UNORDERED:
5321 str = "unord";
5322 break;
5323 case ORDERED:
5324 str = "ord";
5325 break;
5326 case UNLT:
5327 str = "nge";
5328 break;
5329 case UNLE:
5330 str = "ngt";
5331 break;
5332 case UNGT:
5333 str = "nle";
5334 break;
5335 case UNGE:
5336 str = "nlt";
5337 break;
5338 case UNEQ:
5339 case LTGT:
5340 gcc_unreachable ();
5341 default:
5342 str = GET_RTX_NAME (GET_CODE (x));
5343 break;
5345 fputs (str, file);
5346 return;
5348 case 'E':
5349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5350 return;
5352 case 'e':
5353 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5354 return;
5356 case 'F':
5357 if (x == CONST0_RTX (GET_MODE (x)))
5358 str = reg_names [FR_REG (0)];
5359 else if (x == CONST1_RTX (GET_MODE (x)))
5360 str = reg_names [FR_REG (1)];
5361 else
5363 gcc_assert (GET_CODE (x) == REG);
5364 str = reg_names [REGNO (x)];
5366 fputs (str, file);
5367 return;
5369 case 'G':
5371 long val[4];
5372 REAL_VALUE_TYPE rv;
5373 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5374 real_to_target (val, &rv, GET_MODE (x));
5375 if (GET_MODE (x) == SFmode)
5376 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5377 else if (GET_MODE (x) == DFmode)
5378 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5379 & 0xffffffff,
5380 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5381 & 0xffffffff);
5382 else
5383 output_operand_lossage ("invalid %%G mode");
5385 return;
5387 case 'I':
5388 fputs (reg_names [REGNO (x) + 1], file);
5389 return;
5391 case 'J':
5392 case 'j':
5394 unsigned int regno = REGNO (XEXP (x, 0));
5395 if (GET_CODE (x) == EQ)
5396 regno += 1;
5397 if (code == 'j')
5398 regno ^= 1;
5399 fputs (reg_names [regno], file);
5401 return;
5403 case 'O':
5404 if (MEM_VOLATILE_P (x))
5405 fputs(".acq", file);
5406 return;
5408 case 'P':
5410 HOST_WIDE_INT value;
5412 switch (GET_CODE (XEXP (x, 0)))
5414 default:
5415 return;
5417 case POST_MODIFY:
5418 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5419 if (GET_CODE (x) == CONST_INT)
5420 value = INTVAL (x);
5421 else
5423 gcc_assert (GET_CODE (x) == REG);
5424 fprintf (file, ", %s", reg_names[REGNO (x)]);
5425 return;
5427 break;
5429 case POST_INC:
5430 value = GET_MODE_SIZE (GET_MODE (x));
5431 break;
5433 case POST_DEC:
5434 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5435 break;
5438 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5439 return;
5442 case 'Q':
5443 if (MEM_VOLATILE_P (x))
5444 fputs(".rel", file);
5445 return;
5447 case 'R':
5448 if (x == CONST0_RTX (GET_MODE (x)))
5449 fputs(".s", file);
5450 else if (x == CONST1_RTX (GET_MODE (x)))
5451 fputs(".d", file);
5452 else if (x == CONST2_RTX (GET_MODE (x)))
5454 else
5455 output_operand_lossage ("invalid %%R value");
5456 return;
5458 case 'S':
5459 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5460 return;
5462 case 'T':
5463 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5465 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5466 return;
5468 break;
5470 case 'U':
5471 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5473 const char *prefix = "0x";
5474 if (INTVAL (x) & 0x80000000)
5476 fprintf (file, "0xffffffff");
5477 prefix = "";
5479 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5480 return;
5482 break;
5484 case 'X':
5486 unsigned int regno = REGNO (x);
5487 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5489 return;
5491 case 'r':
5492 /* If this operand is the constant zero, write it as register zero.
5493 Any register, zero, or CONST_INT value is OK here. */
5494 if (GET_CODE (x) == REG)
5495 fputs (reg_names[REGNO (x)], file);
5496 else if (x == CONST0_RTX (GET_MODE (x)))
5497 fputs ("r0", file);
5498 else if (GET_CODE (x) == CONST_INT)
5499 output_addr_const (file, x);
5500 else
5501 output_operand_lossage ("invalid %%r value");
5502 return;
5504 case 'v':
5505 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5506 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5507 break;
5509 case '+':
5511 const char *which;
5513 /* For conditional branches, returns or calls, substitute
5514 sptk, dptk, dpnt, or spnt for %s. */
5515 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5516 if (x)
5518 int pred_val = XINT (x, 0);
5520 /* Guess top and bottom 10% statically predicted. */
5521 if (pred_val < REG_BR_PROB_BASE / 50
5522 && br_prob_note_reliable_p (x))
5523 which = ".spnt";
5524 else if (pred_val < REG_BR_PROB_BASE / 2)
5525 which = ".dpnt";
5526 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5527 || !br_prob_note_reliable_p (x))
5528 which = ".dptk";
5529 else
5530 which = ".sptk";
5532 else if (CALL_P (current_output_insn))
5533 which = ".sptk";
5534 else
5535 which = ".dptk";
5537 fputs (which, file);
5538 return;
5541 case ',':
5542 x = current_insn_predicate;
5543 if (x)
5545 unsigned int regno = REGNO (XEXP (x, 0));
5546 if (GET_CODE (x) == EQ)
5547 regno += 1;
5548 fprintf (file, "(%s) ", reg_names [regno]);
5550 return;
5552 default:
5553 output_operand_lossage ("ia64_print_operand: unknown code");
5554 return;
5557 switch (GET_CODE (x))
5559 /* This happens for the spill/restore instructions. */
5560 case POST_INC:
5561 case POST_DEC:
5562 case POST_MODIFY:
5563 x = XEXP (x, 0);
5564 /* ... fall through ... */
5566 case REG:
5567 fputs (reg_names [REGNO (x)], file);
5568 break;
5570 case MEM:
5572 rtx addr = XEXP (x, 0);
5573 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5574 addr = XEXP (addr, 0);
5575 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5576 break;
5579 default:
5580 output_addr_const (file, x);
5581 break;
5584 return;
5587 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5589 static bool
5590 ia64_print_operand_punct_valid_p (unsigned char code)
5592 return (code == '+' || code == ',');
5595 /* Compute a (partial) cost for rtx X. Return true if the complete
5596 cost has been computed, and false if subexpressions should be
5597 scanned. In either case, *TOTAL contains the cost result. */
5598 /* ??? This is incomplete. */
5600 static bool
5601 ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5602 int *total, bool speed ATTRIBUTE_UNUSED)
5604 switch (code)
5606 case CONST_INT:
5607 switch (outer_code)
5609 case SET:
5610 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5611 return true;
5612 case PLUS:
5613 if (satisfies_constraint_I (x))
5614 *total = 0;
5615 else if (satisfies_constraint_J (x))
5616 *total = 1;
5617 else
5618 *total = COSTS_N_INSNS (1);
5619 return true;
5620 default:
5621 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5622 *total = 0;
5623 else
5624 *total = COSTS_N_INSNS (1);
5625 return true;
5628 case CONST_DOUBLE:
5629 *total = COSTS_N_INSNS (1);
5630 return true;
5632 case CONST:
5633 case SYMBOL_REF:
5634 case LABEL_REF:
5635 *total = COSTS_N_INSNS (3);
5636 return true;
5638 case FMA:
5639 *total = COSTS_N_INSNS (4);
5640 return true;
5642 case MULT:
5643 /* For multiplies wider than HImode, we have to go to the FPU,
5644 which normally involves copies. Plus there's the latency
5645 of the multiply itself, and the latency of the instructions to
5646 transfer integer regs to FP regs. */
5647 if (FLOAT_MODE_P (GET_MODE (x)))
5648 *total = COSTS_N_INSNS (4);
5649 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5650 *total = COSTS_N_INSNS (10);
5651 else
5652 *total = COSTS_N_INSNS (2);
5653 return true;
5655 case PLUS:
5656 case MINUS:
5657 if (FLOAT_MODE_P (GET_MODE (x)))
5659 *total = COSTS_N_INSNS (4);
5660 return true;
5662 /* FALLTHRU */
5664 case ASHIFT:
5665 case ASHIFTRT:
5666 case LSHIFTRT:
5667 *total = COSTS_N_INSNS (1);
5668 return true;
5670 case DIV:
5671 case UDIV:
5672 case MOD:
5673 case UMOD:
5674 /* We make divide expensive, so that divide-by-constant will be
5675 optimized to a multiply. */
5676 *total = COSTS_N_INSNS (60);
5677 return true;
5679 default:
5680 return false;
5684 /* Calculate the cost of moving data from a register in class FROM to
5685 one in class TO, using MODE. */
5687 static int
5688 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5689 reg_class_t to)
5691 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5692 if (to == ADDL_REGS)
5693 to = GR_REGS;
5694 if (from == ADDL_REGS)
5695 from = GR_REGS;
5697 /* All costs are symmetric, so reduce cases by putting the
5698 lower number class as the destination. */
5699 if (from < to)
5701 reg_class_t tmp = to;
5702 to = from, from = tmp;
5705 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5706 so that we get secondary memory reloads. Between FR_REGS,
5707 we have to make this at least as expensive as memory_move_cost
5708 to avoid spectacularly poor register class preferencing. */
5709 if (mode == XFmode || mode == RFmode)
5711 if (to != GR_REGS || from != GR_REGS)
5712 return memory_move_cost (mode, to, false);
5713 else
5714 return 3;
5717 switch (to)
5719 case PR_REGS:
5720 /* Moving between PR registers takes two insns. */
5721 if (from == PR_REGS)
5722 return 3;
5723 /* Moving between PR and anything but GR is impossible. */
5724 if (from != GR_REGS)
5725 return memory_move_cost (mode, to, false);
5726 break;
5728 case BR_REGS:
5729 /* Moving between BR and anything but GR is impossible. */
5730 if (from != GR_REGS && from != GR_AND_BR_REGS)
5731 return memory_move_cost (mode, to, false);
5732 break;
5734 case AR_I_REGS:
5735 case AR_M_REGS:
5736 /* Moving between AR and anything but GR is impossible. */
5737 if (from != GR_REGS)
5738 return memory_move_cost (mode, to, false);
5739 break;
5741 case GR_REGS:
5742 case FR_REGS:
5743 case FP_REGS:
5744 case GR_AND_FR_REGS:
5745 case GR_AND_BR_REGS:
5746 case ALL_REGS:
5747 break;
5749 default:
5750 gcc_unreachable ();
5753 return 2;
5756 /* Calculate the cost of moving data of MODE from a register to or from
5757 memory. */
5759 static int
5760 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5761 reg_class_t rclass,
5762 bool in ATTRIBUTE_UNUSED)
5764 if (rclass == GENERAL_REGS
5765 || rclass == FR_REGS
5766 || rclass == FP_REGS
5767 || rclass == GR_AND_FR_REGS)
5768 return 4;
5769 else
5770 return 10;
5773 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5774 on RCLASS to use when copying X into that class. */
5776 static reg_class_t
5777 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5779 switch (rclass)
5781 case FR_REGS:
5782 case FP_REGS:
5783 /* Don't allow volatile mem reloads into floating point registers.
5784 This is defined to force reload to choose the r/m case instead
5785 of the f/f case when reloading (set (reg fX) (mem/v)). */
5786 if (MEM_P (x) && MEM_VOLATILE_P (x))
5787 return NO_REGS;
5789 /* Force all unrecognized constants into the constant pool. */
5790 if (CONSTANT_P (x))
5791 return NO_REGS;
5792 break;
5794 case AR_M_REGS:
5795 case AR_I_REGS:
5796 if (!OBJECT_P (x))
5797 return NO_REGS;
5798 break;
5800 default:
5801 break;
5804 return rclass;
5807 /* This function returns the register class required for a secondary
5808 register when copying between one of the registers in RCLASS, and X,
5809 using MODE. A return value of NO_REGS means that no secondary register
5810 is required. */
5812 enum reg_class
5813 ia64_secondary_reload_class (enum reg_class rclass,
5814 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5816 int regno = -1;
5818 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5819 regno = true_regnum (x);
5821 switch (rclass)
5823 case BR_REGS:
5824 case AR_M_REGS:
5825 case AR_I_REGS:
5826 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5827 interaction. We end up with two pseudos with overlapping lifetimes
5828 both of which are equiv to the same constant, and both which need
5829 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5830 changes depending on the path length, which means the qty_first_reg
5831 check in make_regs_eqv can give different answers at different times.
5832 At some point I'll probably need a reload_indi pattern to handle
5833 this.
5835 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5836 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5837 non-general registers for good measure. */
5838 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5839 return GR_REGS;
5841 /* This is needed if a pseudo used as a call_operand gets spilled to a
5842 stack slot. */
5843 if (GET_CODE (x) == MEM)
5844 return GR_REGS;
5845 break;
5847 case FR_REGS:
5848 case FP_REGS:
5849 /* Need to go through general registers to get to other class regs. */
5850 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5851 return GR_REGS;
5853 /* This can happen when a paradoxical subreg is an operand to the
5854 muldi3 pattern. */
5855 /* ??? This shouldn't be necessary after instruction scheduling is
5856 enabled, because paradoxical subregs are not accepted by
5857 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5858 stop the paradoxical subreg stupidity in the *_operand functions
5859 in recog.c. */
5860 if (GET_CODE (x) == MEM
5861 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5862 || GET_MODE (x) == QImode))
5863 return GR_REGS;
5865 /* This can happen because of the ior/and/etc patterns that accept FP
5866 registers as operands. If the third operand is a constant, then it
5867 needs to be reloaded into a FP register. */
5868 if (GET_CODE (x) == CONST_INT)
5869 return GR_REGS;
5871 /* This can happen because of register elimination in a muldi3 insn.
5872 E.g. `26107 * (unsigned long)&u'. */
5873 if (GET_CODE (x) == PLUS)
5874 return GR_REGS;
5875 break;
5877 case PR_REGS:
5878 /* ??? This happens if we cse/gcse a BImode value across a call,
5879 and the function has a nonlocal goto. This is because global
5880 does not allocate call crossing pseudos to hard registers when
5881 crtl->has_nonlocal_goto is true. This is relatively
5882 common for C++ programs that use exceptions. To reproduce,
5883 return NO_REGS and compile libstdc++. */
5884 if (GET_CODE (x) == MEM)
5885 return GR_REGS;
5887 /* This can happen when we take a BImode subreg of a DImode value,
5888 and that DImode value winds up in some non-GR register. */
5889 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5890 return GR_REGS;
5891 break;
5893 default:
5894 break;
5897 return NO_REGS;
5901 /* Implement targetm.unspec_may_trap_p hook. */
5902 static int
5903 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5905 switch (XINT (x, 1))
5907 case UNSPEC_LDA:
5908 case UNSPEC_LDS:
5909 case UNSPEC_LDSA:
5910 case UNSPEC_LDCCLR:
5911 case UNSPEC_CHKACLR:
5912 case UNSPEC_CHKS:
5913 /* These unspecs are just wrappers. */
5914 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5917 return default_unspec_may_trap_p (x, flags);
5921 /* Parse the -mfixed-range= option string. */
5923 static void
5924 fix_range (const char *const_str)
5926 int i, first, last;
5927 char *str, *dash, *comma;
5929 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5930 REG2 are either register names or register numbers. The effect
5931 of this option is to mark the registers in the range from REG1 to
5932 REG2 as ``fixed'' so they won't be used by the compiler. This is
5933 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5935 i = strlen (const_str);
5936 str = (char *) alloca (i + 1);
5937 memcpy (str, const_str, i + 1);
5939 while (1)
5941 dash = strchr (str, '-');
5942 if (!dash)
5944 warning (0, "value of -mfixed-range must have form REG1-REG2");
5945 return;
5947 *dash = '\0';
5949 comma = strchr (dash + 1, ',');
5950 if (comma)
5951 *comma = '\0';
5953 first = decode_reg_name (str);
5954 if (first < 0)
5956 warning (0, "unknown register name: %s", str);
5957 return;
5960 last = decode_reg_name (dash + 1);
5961 if (last < 0)
5963 warning (0, "unknown register name: %s", dash + 1);
5964 return;
5967 *dash = '-';
5969 if (first > last)
5971 warning (0, "%s-%s is an empty range", str, dash + 1);
5972 return;
5975 for (i = first; i <= last; ++i)
5976 fixed_regs[i] = call_used_regs[i] = 1;
5978 if (!comma)
5979 break;
5981 *comma = ',';
5982 str = comma + 1;
5986 /* Implement TARGET_OPTION_OVERRIDE. */
5988 static void
5989 ia64_option_override (void)
5991 unsigned int i;
5992 cl_deferred_option *opt;
5993 vec<cl_deferred_option> *v
5994 = (vec<cl_deferred_option> *) ia64_deferred_options;
5996 if (v)
5997 FOR_EACH_VEC_ELT (*v, i, opt)
5999 switch (opt->opt_index)
6001 case OPT_mfixed_range_:
6002 fix_range (opt->arg);
6003 break;
6005 default:
6006 gcc_unreachable ();
6010 if (TARGET_AUTO_PIC)
6011 target_flags |= MASK_CONST_GP;
6013 /* Numerous experiment shows that IRA based loop pressure
6014 calculation works better for RTL loop invariant motion on targets
6015 with enough (>= 32) registers. It is an expensive optimization.
6016 So it is on only for peak performance. */
6017 if (optimize >= 3)
6018 flag_ira_loop_pressure = 1;
6021 ia64_section_threshold = (global_options_set.x_g_switch_value
6022 ? g_switch_value
6023 : IA64_DEFAULT_GVALUE);
6025 init_machine_status = ia64_init_machine_status;
6027 if (align_functions <= 0)
6028 align_functions = 64;
6029 if (align_loops <= 0)
6030 align_loops = 32;
6031 if (TARGET_ABI_OPEN_VMS)
6032 flag_no_common = 1;
6034 ia64_override_options_after_change();
6037 /* Implement targetm.override_options_after_change. */
6039 static void
6040 ia64_override_options_after_change (void)
6042 if (optimize >= 3
6043 && !global_options_set.x_flag_selective_scheduling
6044 && !global_options_set.x_flag_selective_scheduling2)
6046 flag_selective_scheduling2 = 1;
6047 flag_sel_sched_pipelining = 1;
6049 if (mflag_sched_control_spec == 2)
6051 /* Control speculation is on by default for the selective scheduler,
6052 but not for the Haifa scheduler. */
6053 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6055 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6057 /* FIXME: remove this when we'd implement breaking autoinsns as
6058 a transformation. */
6059 flag_auto_inc_dec = 0;
6063 /* Initialize the record of emitted frame related registers. */
6065 void ia64_init_expanders (void)
6067 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6070 static struct machine_function *
6071 ia64_init_machine_status (void)
6073 return ggc_cleared_alloc<machine_function> ();
6076 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6077 static enum attr_type ia64_safe_type (rtx_insn *);
6079 static enum attr_itanium_class
6080 ia64_safe_itanium_class (rtx_insn *insn)
6082 if (recog_memoized (insn) >= 0)
6083 return get_attr_itanium_class (insn);
6084 else if (DEBUG_INSN_P (insn))
6085 return ITANIUM_CLASS_IGNORE;
6086 else
6087 return ITANIUM_CLASS_UNKNOWN;
6090 static enum attr_type
6091 ia64_safe_type (rtx_insn *insn)
6093 if (recog_memoized (insn) >= 0)
6094 return get_attr_type (insn);
6095 else
6096 return TYPE_UNKNOWN;
6099 /* The following collection of routines emit instruction group stop bits as
6100 necessary to avoid dependencies. */
6102 /* Need to track some additional registers as far as serialization is
6103 concerned so we can properly handle br.call and br.ret. We could
6104 make these registers visible to gcc, but since these registers are
6105 never explicitly used in gcc generated code, it seems wasteful to
6106 do so (plus it would make the call and return patterns needlessly
6107 complex). */
6108 #define REG_RP (BR_REG (0))
6109 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6110 /* This is used for volatile asms which may require a stop bit immediately
6111 before and after them. */
6112 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6113 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6114 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6116 /* For each register, we keep track of how it has been written in the
6117 current instruction group.
6119 If a register is written unconditionally (no qualifying predicate),
6120 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6122 If a register is written if its qualifying predicate P is true, we
6123 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6124 may be written again by the complement of P (P^1) and when this happens,
6125 WRITE_COUNT gets set to 2.
6127 The result of this is that whenever an insn attempts to write a register
6128 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6130 If a predicate register is written by a floating-point insn, we set
6131 WRITTEN_BY_FP to true.
6133 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6134 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6136 #if GCC_VERSION >= 4000
6137 #define RWS_FIELD_TYPE __extension__ unsigned short
6138 #else
6139 #define RWS_FIELD_TYPE unsigned int
6140 #endif
6141 struct reg_write_state
6143 RWS_FIELD_TYPE write_count : 2;
6144 RWS_FIELD_TYPE first_pred : 10;
6145 RWS_FIELD_TYPE written_by_fp : 1;
6146 RWS_FIELD_TYPE written_by_and : 1;
6147 RWS_FIELD_TYPE written_by_or : 1;
6150 /* Cumulative info for the current instruction group. */
6151 struct reg_write_state rws_sum[NUM_REGS];
6152 #ifdef ENABLE_CHECKING
6153 /* Bitmap whether a register has been written in the current insn. */
6154 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6155 / HOST_BITS_PER_WIDEST_FAST_INT];
6157 static inline void
6158 rws_insn_set (int regno)
6160 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6161 SET_HARD_REG_BIT (rws_insn, regno);
6164 static inline int
6165 rws_insn_test (int regno)
6167 return TEST_HARD_REG_BIT (rws_insn, regno);
6169 #else
6170 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6171 unsigned char rws_insn[2];
6173 static inline void
6174 rws_insn_set (int regno)
6176 if (regno == REG_AR_CFM)
6177 rws_insn[0] = 1;
6178 else if (regno == REG_VOLATILE)
6179 rws_insn[1] = 1;
6182 static inline int
6183 rws_insn_test (int regno)
6185 if (regno == REG_AR_CFM)
6186 return rws_insn[0];
6187 if (regno == REG_VOLATILE)
6188 return rws_insn[1];
6189 return 0;
6191 #endif
6193 /* Indicates whether this is the first instruction after a stop bit,
6194 in which case we don't need another stop bit. Without this,
6195 ia64_variable_issue will die when scheduling an alloc. */
6196 static int first_instruction;
6198 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6199 RTL for one instruction. */
6200 struct reg_flags
6202 unsigned int is_write : 1; /* Is register being written? */
6203 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6204 unsigned int is_branch : 1; /* Is register used as part of a branch? */
6205 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6206 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
6207 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6210 static void rws_update (int, struct reg_flags, int);
6211 static int rws_access_regno (int, struct reg_flags, int);
6212 static int rws_access_reg (rtx, struct reg_flags, int);
6213 static void update_set_flags (rtx, struct reg_flags *);
6214 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6215 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6216 static void init_insn_group_barriers (void);
6217 static int group_barrier_needed (rtx_insn *);
6218 static int safe_group_barrier_needed (rtx_insn *);
6219 static int in_safe_group_barrier;
6221 /* Update *RWS for REGNO, which is being written by the current instruction,
6222 with predicate PRED, and associated register flags in FLAGS. */
6224 static void
6225 rws_update (int regno, struct reg_flags flags, int pred)
6227 if (pred)
6228 rws_sum[regno].write_count++;
6229 else
6230 rws_sum[regno].write_count = 2;
6231 rws_sum[regno].written_by_fp |= flags.is_fp;
6232 /* ??? Not tracking and/or across differing predicates. */
6233 rws_sum[regno].written_by_and = flags.is_and;
6234 rws_sum[regno].written_by_or = flags.is_or;
6235 rws_sum[regno].first_pred = pred;
6238 /* Handle an access to register REGNO of type FLAGS using predicate register
6239 PRED. Update rws_sum array. Return 1 if this access creates
6240 a dependency with an earlier instruction in the same group. */
6242 static int
6243 rws_access_regno (int regno, struct reg_flags flags, int pred)
6245 int need_barrier = 0;
6247 gcc_assert (regno < NUM_REGS);
6249 if (! PR_REGNO_P (regno))
6250 flags.is_and = flags.is_or = 0;
6252 if (flags.is_write)
6254 int write_count;
6256 rws_insn_set (regno);
6257 write_count = rws_sum[regno].write_count;
6259 switch (write_count)
6261 case 0:
6262 /* The register has not been written yet. */
6263 if (!in_safe_group_barrier)
6264 rws_update (regno, flags, pred);
6265 break;
6267 case 1:
6268 /* The register has been written via a predicate. Treat
6269 it like a unconditional write and do not try to check
6270 for complementary pred reg in earlier write. */
6271 if (flags.is_and && rws_sum[regno].written_by_and)
6273 else if (flags.is_or && rws_sum[regno].written_by_or)
6275 else
6276 need_barrier = 1;
6277 if (!in_safe_group_barrier)
6278 rws_update (regno, flags, pred);
6279 break;
6281 case 2:
6282 /* The register has been unconditionally written already. We
6283 need a barrier. */
6284 if (flags.is_and && rws_sum[regno].written_by_and)
6286 else if (flags.is_or && rws_sum[regno].written_by_or)
6288 else
6289 need_barrier = 1;
6290 if (!in_safe_group_barrier)
6292 rws_sum[regno].written_by_and = flags.is_and;
6293 rws_sum[regno].written_by_or = flags.is_or;
6295 break;
6297 default:
6298 gcc_unreachable ();
6301 else
6303 if (flags.is_branch)
6305 /* Branches have several RAW exceptions that allow to avoid
6306 barriers. */
6308 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6309 /* RAW dependencies on branch regs are permissible as long
6310 as the writer is a non-branch instruction. Since we
6311 never generate code that uses a branch register written
6312 by a branch instruction, handling this case is
6313 easy. */
6314 return 0;
6316 if (REGNO_REG_CLASS (regno) == PR_REGS
6317 && ! rws_sum[regno].written_by_fp)
6318 /* The predicates of a branch are available within the
6319 same insn group as long as the predicate was written by
6320 something other than a floating-point instruction. */
6321 return 0;
6324 if (flags.is_and && rws_sum[regno].written_by_and)
6325 return 0;
6326 if (flags.is_or && rws_sum[regno].written_by_or)
6327 return 0;
6329 switch (rws_sum[regno].write_count)
6331 case 0:
6332 /* The register has not been written yet. */
6333 break;
6335 case 1:
6336 /* The register has been written via a predicate, assume we
6337 need a barrier (don't check for complementary regs). */
6338 need_barrier = 1;
6339 break;
6341 case 2:
6342 /* The register has been unconditionally written already. We
6343 need a barrier. */
6344 need_barrier = 1;
6345 break;
6347 default:
6348 gcc_unreachable ();
6352 return need_barrier;
6355 static int
6356 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6358 int regno = REGNO (reg);
6359 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6361 if (n == 1)
6362 return rws_access_regno (regno, flags, pred);
6363 else
6365 int need_barrier = 0;
6366 while (--n >= 0)
6367 need_barrier |= rws_access_regno (regno + n, flags, pred);
6368 return need_barrier;
6372 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6373 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6375 static void
6376 update_set_flags (rtx x, struct reg_flags *pflags)
6378 rtx src = SET_SRC (x);
6380 switch (GET_CODE (src))
6382 case CALL:
6383 return;
6385 case IF_THEN_ELSE:
6386 /* There are four cases here:
6387 (1) The destination is (pc), in which case this is a branch,
6388 nothing here applies.
6389 (2) The destination is ar.lc, in which case this is a
6390 doloop_end_internal,
6391 (3) The destination is an fp register, in which case this is
6392 an fselect instruction.
6393 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6394 this is a check load.
6395 In all cases, nothing we do in this function applies. */
6396 return;
6398 default:
6399 if (COMPARISON_P (src)
6400 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6401 /* Set pflags->is_fp to 1 so that we know we're dealing
6402 with a floating point comparison when processing the
6403 destination of the SET. */
6404 pflags->is_fp = 1;
6406 /* Discover if this is a parallel comparison. We only handle
6407 and.orcm and or.andcm at present, since we must retain a
6408 strict inverse on the predicate pair. */
6409 else if (GET_CODE (src) == AND)
6410 pflags->is_and = 1;
6411 else if (GET_CODE (src) == IOR)
6412 pflags->is_or = 1;
6414 break;
6418 /* Subroutine of rtx_needs_barrier; this function determines whether the
6419 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6420 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6421 for this insn. */
6423 static int
6424 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6426 int need_barrier = 0;
6427 rtx dst;
6428 rtx src = SET_SRC (x);
6430 if (GET_CODE (src) == CALL)
6431 /* We don't need to worry about the result registers that
6432 get written by subroutine call. */
6433 return rtx_needs_barrier (src, flags, pred);
6434 else if (SET_DEST (x) == pc_rtx)
6436 /* X is a conditional branch. */
6437 /* ??? This seems redundant, as the caller sets this bit for
6438 all JUMP_INSNs. */
6439 if (!ia64_spec_check_src_p (src))
6440 flags.is_branch = 1;
6441 return rtx_needs_barrier (src, flags, pred);
6444 if (ia64_spec_check_src_p (src))
6445 /* Avoid checking one register twice (in condition
6446 and in 'then' section) for ldc pattern. */
6448 gcc_assert (REG_P (XEXP (src, 2)));
6449 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6451 /* We process MEM below. */
6452 src = XEXP (src, 1);
6455 need_barrier |= rtx_needs_barrier (src, flags, pred);
6457 dst = SET_DEST (x);
6458 if (GET_CODE (dst) == ZERO_EXTRACT)
6460 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6461 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6463 return need_barrier;
6466 /* Handle an access to rtx X of type FLAGS using predicate register
6467 PRED. Return 1 if this access creates a dependency with an earlier
6468 instruction in the same group. */
6470 static int
6471 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6473 int i, j;
6474 int is_complemented = 0;
6475 int need_barrier = 0;
6476 const char *format_ptr;
6477 struct reg_flags new_flags;
6478 rtx cond;
6480 if (! x)
6481 return 0;
6483 new_flags = flags;
6485 switch (GET_CODE (x))
6487 case SET:
6488 update_set_flags (x, &new_flags);
6489 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6490 if (GET_CODE (SET_SRC (x)) != CALL)
6492 new_flags.is_write = 1;
6493 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6495 break;
6497 case CALL:
6498 new_flags.is_write = 0;
6499 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6501 /* Avoid multiple register writes, in case this is a pattern with
6502 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6503 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6505 new_flags.is_write = 1;
6506 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6507 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6508 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6510 break;
6512 case COND_EXEC:
6513 /* X is a predicated instruction. */
6515 cond = COND_EXEC_TEST (x);
6516 gcc_assert (!pred);
6517 need_barrier = rtx_needs_barrier (cond, flags, 0);
6519 if (GET_CODE (cond) == EQ)
6520 is_complemented = 1;
6521 cond = XEXP (cond, 0);
6522 gcc_assert (GET_CODE (cond) == REG
6523 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6524 pred = REGNO (cond);
6525 if (is_complemented)
6526 ++pred;
6528 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6529 return need_barrier;
6531 case CLOBBER:
6532 case USE:
6533 /* Clobber & use are for earlier compiler-phases only. */
6534 break;
6536 case ASM_OPERANDS:
6537 case ASM_INPUT:
6538 /* We always emit stop bits for traditional asms. We emit stop bits
6539 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6540 if (GET_CODE (x) != ASM_OPERANDS
6541 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6543 /* Avoid writing the register multiple times if we have multiple
6544 asm outputs. This avoids a failure in rws_access_reg. */
6545 if (! rws_insn_test (REG_VOLATILE))
6547 new_flags.is_write = 1;
6548 rws_access_regno (REG_VOLATILE, new_flags, pred);
6550 return 1;
6553 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6554 We cannot just fall through here since then we would be confused
6555 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6556 traditional asms unlike their normal usage. */
6558 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6559 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6560 need_barrier = 1;
6561 break;
6563 case PARALLEL:
6564 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6566 rtx pat = XVECEXP (x, 0, i);
6567 switch (GET_CODE (pat))
6569 case SET:
6570 update_set_flags (pat, &new_flags);
6571 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6572 break;
6574 case USE:
6575 case CALL:
6576 case ASM_OPERANDS:
6577 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6578 break;
6580 case CLOBBER:
6581 if (REG_P (XEXP (pat, 0))
6582 && extract_asm_operands (x) != NULL_RTX
6583 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6585 new_flags.is_write = 1;
6586 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6587 new_flags, pred);
6588 new_flags = flags;
6590 break;
6592 case RETURN:
6593 break;
6595 default:
6596 gcc_unreachable ();
6599 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6601 rtx pat = XVECEXP (x, 0, i);
6602 if (GET_CODE (pat) == SET)
6604 if (GET_CODE (SET_SRC (pat)) != CALL)
6606 new_flags.is_write = 1;
6607 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6608 pred);
6611 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6612 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6614 break;
6616 case SUBREG:
6617 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6618 break;
6619 case REG:
6620 if (REGNO (x) == AR_UNAT_REGNUM)
6622 for (i = 0; i < 64; ++i)
6623 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6625 else
6626 need_barrier = rws_access_reg (x, flags, pred);
6627 break;
6629 case MEM:
6630 /* Find the regs used in memory address computation. */
6631 new_flags.is_write = 0;
6632 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6633 break;
6635 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6636 case SYMBOL_REF: case LABEL_REF: case CONST:
6637 break;
6639 /* Operators with side-effects. */
6640 case POST_INC: case POST_DEC:
6641 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6643 new_flags.is_write = 0;
6644 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6645 new_flags.is_write = 1;
6646 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6647 break;
6649 case POST_MODIFY:
6650 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6652 new_flags.is_write = 0;
6653 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6654 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6655 new_flags.is_write = 1;
6656 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6657 break;
6659 /* Handle common unary and binary ops for efficiency. */
6660 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6661 case MOD: case UDIV: case UMOD: case AND: case IOR:
6662 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6663 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6664 case NE: case EQ: case GE: case GT: case LE:
6665 case LT: case GEU: case GTU: case LEU: case LTU:
6666 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6667 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6668 break;
6670 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6671 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6672 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6673 case SQRT: case FFS: case POPCOUNT:
6674 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6675 break;
6677 case VEC_SELECT:
6678 /* VEC_SELECT's second argument is a PARALLEL with integers that
6679 describe the elements selected. On ia64, those integers are
6680 always constants. Avoid walking the PARALLEL so that we don't
6681 get confused with "normal" parallels and then die. */
6682 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6683 break;
6685 case UNSPEC:
6686 switch (XINT (x, 1))
6688 case UNSPEC_LTOFF_DTPMOD:
6689 case UNSPEC_LTOFF_DTPREL:
6690 case UNSPEC_DTPREL:
6691 case UNSPEC_LTOFF_TPREL:
6692 case UNSPEC_TPREL:
6693 case UNSPEC_PRED_REL_MUTEX:
6694 case UNSPEC_PIC_CALL:
6695 case UNSPEC_MF:
6696 case UNSPEC_FETCHADD_ACQ:
6697 case UNSPEC_FETCHADD_REL:
6698 case UNSPEC_BSP_VALUE:
6699 case UNSPEC_FLUSHRS:
6700 case UNSPEC_BUNDLE_SELECTOR:
6701 break;
6703 case UNSPEC_GR_SPILL:
6704 case UNSPEC_GR_RESTORE:
6706 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6707 HOST_WIDE_INT bit = (offset >> 3) & 63;
6709 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6710 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6711 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6712 new_flags, pred);
6713 break;
6716 case UNSPEC_FR_SPILL:
6717 case UNSPEC_FR_RESTORE:
6718 case UNSPEC_GETF_EXP:
6719 case UNSPEC_SETF_EXP:
6720 case UNSPEC_ADDP4:
6721 case UNSPEC_FR_SQRT_RECIP_APPROX:
6722 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6723 case UNSPEC_LDA:
6724 case UNSPEC_LDS:
6725 case UNSPEC_LDS_A:
6726 case UNSPEC_LDSA:
6727 case UNSPEC_CHKACLR:
6728 case UNSPEC_CHKS:
6729 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6730 break;
6732 case UNSPEC_FR_RECIP_APPROX:
6733 case UNSPEC_SHRP:
6734 case UNSPEC_COPYSIGN:
6735 case UNSPEC_FR_RECIP_APPROX_RES:
6736 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6737 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6738 break;
6740 case UNSPEC_CMPXCHG_ACQ:
6741 case UNSPEC_CMPXCHG_REL:
6742 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6743 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6744 break;
6746 default:
6747 gcc_unreachable ();
6749 break;
6751 case UNSPEC_VOLATILE:
6752 switch (XINT (x, 1))
6754 case UNSPECV_ALLOC:
6755 /* Alloc must always be the first instruction of a group.
6756 We force this by always returning true. */
6757 /* ??? We might get better scheduling if we explicitly check for
6758 input/local/output register dependencies, and modify the
6759 scheduler so that alloc is always reordered to the start of
6760 the current group. We could then eliminate all of the
6761 first_instruction code. */
6762 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6764 new_flags.is_write = 1;
6765 rws_access_regno (REG_AR_CFM, new_flags, pred);
6766 return 1;
6768 case UNSPECV_SET_BSP:
6769 case UNSPECV_PROBE_STACK_RANGE:
6770 need_barrier = 1;
6771 break;
6773 case UNSPECV_BLOCKAGE:
6774 case UNSPECV_INSN_GROUP_BARRIER:
6775 case UNSPECV_BREAK:
6776 case UNSPECV_PSAC_ALL:
6777 case UNSPECV_PSAC_NORMAL:
6778 return 0;
6780 case UNSPECV_PROBE_STACK_ADDRESS:
6781 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6782 break;
6784 default:
6785 gcc_unreachable ();
6787 break;
6789 case RETURN:
6790 new_flags.is_write = 0;
6791 need_barrier = rws_access_regno (REG_RP, flags, pred);
6792 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6794 new_flags.is_write = 1;
6795 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6796 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6797 break;
6799 default:
6800 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6801 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6802 switch (format_ptr[i])
6804 case '0': /* unused field */
6805 case 'i': /* integer */
6806 case 'n': /* note */
6807 case 'w': /* wide integer */
6808 case 's': /* pointer to string */
6809 case 'S': /* optional pointer to string */
6810 break;
6812 case 'e':
6813 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6814 need_barrier = 1;
6815 break;
6817 case 'E':
6818 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6819 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6820 need_barrier = 1;
6821 break;
6823 default:
6824 gcc_unreachable ();
6826 break;
6828 return need_barrier;
6831 /* Clear out the state for group_barrier_needed at the start of a
6832 sequence of insns. */
6834 static void
6835 init_insn_group_barriers (void)
6837 memset (rws_sum, 0, sizeof (rws_sum));
6838 first_instruction = 1;
6841 /* Given the current state, determine whether a group barrier (a stop bit) is
6842 necessary before INSN. Return nonzero if so. This modifies the state to
6843 include the effects of INSN as a side-effect. */
6845 static int
6846 group_barrier_needed (rtx_insn *insn)
6848 rtx pat;
6849 int need_barrier = 0;
6850 struct reg_flags flags;
6852 memset (&flags, 0, sizeof (flags));
6853 switch (GET_CODE (insn))
6855 case NOTE:
6856 case DEBUG_INSN:
6857 break;
6859 case BARRIER:
6860 /* A barrier doesn't imply an instruction group boundary. */
6861 break;
6863 case CODE_LABEL:
6864 memset (rws_insn, 0, sizeof (rws_insn));
6865 return 1;
6867 case CALL_INSN:
6868 flags.is_branch = 1;
6869 flags.is_sibcall = SIBLING_CALL_P (insn);
6870 memset (rws_insn, 0, sizeof (rws_insn));
6872 /* Don't bundle a call following another call. */
6873 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6875 need_barrier = 1;
6876 break;
6879 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6880 break;
6882 case JUMP_INSN:
6883 if (!ia64_spec_check_p (insn))
6884 flags.is_branch = 1;
6886 /* Don't bundle a jump following a call. */
6887 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6889 need_barrier = 1;
6890 break;
6892 /* FALLTHRU */
6894 case INSN:
6895 if (GET_CODE (PATTERN (insn)) == USE
6896 || GET_CODE (PATTERN (insn)) == CLOBBER)
6897 /* Don't care about USE and CLOBBER "insns"---those are used to
6898 indicate to the optimizer that it shouldn't get rid of
6899 certain operations. */
6900 break;
6902 pat = PATTERN (insn);
6904 /* Ug. Hack hacks hacked elsewhere. */
6905 switch (recog_memoized (insn))
6907 /* We play dependency tricks with the epilogue in order
6908 to get proper schedules. Undo this for dv analysis. */
6909 case CODE_FOR_epilogue_deallocate_stack:
6910 case CODE_FOR_prologue_allocate_stack:
6911 pat = XVECEXP (pat, 0, 0);
6912 break;
6914 /* The pattern we use for br.cloop confuses the code above.
6915 The second element of the vector is representative. */
6916 case CODE_FOR_doloop_end_internal:
6917 pat = XVECEXP (pat, 0, 1);
6918 break;
6920 /* Doesn't generate code. */
6921 case CODE_FOR_pred_rel_mutex:
6922 case CODE_FOR_prologue_use:
6923 return 0;
6925 default:
6926 break;
6929 memset (rws_insn, 0, sizeof (rws_insn));
6930 need_barrier = rtx_needs_barrier (pat, flags, 0);
6932 /* Check to see if the previous instruction was a volatile
6933 asm. */
6934 if (! need_barrier)
6935 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6937 break;
6939 default:
6940 gcc_unreachable ();
6943 if (first_instruction && important_for_bundling_p (insn))
6945 need_barrier = 0;
6946 first_instruction = 0;
6949 return need_barrier;
6952 /* Like group_barrier_needed, but do not clobber the current state. */
6954 static int
6955 safe_group_barrier_needed (rtx_insn *insn)
6957 int saved_first_instruction;
6958 int t;
6960 saved_first_instruction = first_instruction;
6961 in_safe_group_barrier = 1;
6963 t = group_barrier_needed (insn);
6965 first_instruction = saved_first_instruction;
6966 in_safe_group_barrier = 0;
6968 return t;
6971 /* Scan the current function and insert stop bits as necessary to
6972 eliminate dependencies. This function assumes that a final
6973 instruction scheduling pass has been run which has already
6974 inserted most of the necessary stop bits. This function only
6975 inserts new ones at basic block boundaries, since these are
6976 invisible to the scheduler. */
6978 static void
6979 emit_insn_group_barriers (FILE *dump)
6981 rtx_insn *insn;
6982 rtx_insn *last_label = 0;
6983 int insns_since_last_label = 0;
6985 init_insn_group_barriers ();
6987 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6989 if (LABEL_P (insn))
6991 if (insns_since_last_label)
6992 last_label = insn;
6993 insns_since_last_label = 0;
6995 else if (NOTE_P (insn)
6996 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6998 if (insns_since_last_label)
6999 last_label = insn;
7000 insns_since_last_label = 0;
7002 else if (NONJUMP_INSN_P (insn)
7003 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7004 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7006 init_insn_group_barriers ();
7007 last_label = 0;
7009 else if (NONDEBUG_INSN_P (insn))
7011 insns_since_last_label = 1;
7013 if (group_barrier_needed (insn))
7015 if (last_label)
7017 if (dump)
7018 fprintf (dump, "Emitting stop before label %d\n",
7019 INSN_UID (last_label));
7020 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7021 insn = last_label;
7023 init_insn_group_barriers ();
7024 last_label = 0;
7031 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7032 This function has to emit all necessary group barriers. */
7034 static void
7035 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7037 rtx_insn *insn;
7039 init_insn_group_barriers ();
7041 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7043 if (BARRIER_P (insn))
7045 rtx_insn *last = prev_active_insn (insn);
7047 if (! last)
7048 continue;
7049 if (JUMP_TABLE_DATA_P (last))
7050 last = prev_active_insn (last);
7051 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7052 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7054 init_insn_group_barriers ();
7056 else if (NONDEBUG_INSN_P (insn))
7058 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7059 init_insn_group_barriers ();
7060 else if (group_barrier_needed (insn))
7062 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7063 init_insn_group_barriers ();
7064 group_barrier_needed (insn);
7072 /* Instruction scheduling support. */
7074 #define NR_BUNDLES 10
7076 /* A list of names of all available bundles. */
7078 static const char *bundle_name [NR_BUNDLES] =
7080 ".mii",
7081 ".mmi",
7082 ".mfi",
7083 ".mmf",
7084 #if NR_BUNDLES == 10
7085 ".bbb",
7086 ".mbb",
7087 #endif
7088 ".mib",
7089 ".mmb",
7090 ".mfb",
7091 ".mlx"
7094 /* Nonzero if we should insert stop bits into the schedule. */
7096 int ia64_final_schedule = 0;
7098 /* Codes of the corresponding queried units: */
7100 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7101 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7103 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7104 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7106 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7108 /* The following variable value is an insn group barrier. */
7110 static rtx_insn *dfa_stop_insn;
7112 /* The following variable value is the last issued insn. */
7114 static rtx_insn *last_scheduled_insn;
7116 /* The following variable value is pointer to a DFA state used as
7117 temporary variable. */
7119 static state_t temp_dfa_state = NULL;
7121 /* The following variable value is DFA state after issuing the last
7122 insn. */
7124 static state_t prev_cycle_state = NULL;
7126 /* The following array element values are TRUE if the corresponding
7127 insn requires to add stop bits before it. */
7129 static char *stops_p = NULL;
7131 /* The following variable is used to set up the mentioned above array. */
7133 static int stop_before_p = 0;
7135 /* The following variable value is length of the arrays `clocks' and
7136 `add_cycles'. */
7138 static int clocks_length;
7140 /* The following variable value is number of data speculations in progress. */
7141 static int pending_data_specs = 0;
7143 /* Number of memory references on current and three future processor cycles. */
7144 static char mem_ops_in_group[4];
7146 /* Number of current processor cycle (from scheduler's point of view). */
7147 static int current_cycle;
7149 static rtx ia64_single_set (rtx_insn *);
7150 static void ia64_emit_insn_before (rtx, rtx);
7152 /* Map a bundle number to its pseudo-op. */
7154 const char *
7155 get_bundle_name (int b)
7157 return bundle_name[b];
7161 /* Return the maximum number of instructions a cpu can issue. */
7163 static int
7164 ia64_issue_rate (void)
7166 return 6;
7169 /* Helper function - like single_set, but look inside COND_EXEC. */
7171 static rtx
7172 ia64_single_set (rtx_insn *insn)
7174 rtx x = PATTERN (insn), ret;
7175 if (GET_CODE (x) == COND_EXEC)
7176 x = COND_EXEC_CODE (x);
7177 if (GET_CODE (x) == SET)
7178 return x;
7180 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7181 Although they are not classical single set, the second set is there just
7182 to protect it from moving past FP-relative stack accesses. */
7183 switch (recog_memoized (insn))
7185 case CODE_FOR_prologue_allocate_stack:
7186 case CODE_FOR_prologue_allocate_stack_pr:
7187 case CODE_FOR_epilogue_deallocate_stack:
7188 case CODE_FOR_epilogue_deallocate_stack_pr:
7189 ret = XVECEXP (x, 0, 0);
7190 break;
7192 default:
7193 ret = single_set_2 (insn, x);
7194 break;
7197 return ret;
7200 /* Adjust the cost of a scheduling dependency.
7201 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7202 COST is the current cost, DW is dependency weakness. */
7203 static int
7204 ia64_adjust_cost_2 (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7205 int cost, dw_t dw)
7207 enum reg_note dep_type = (enum reg_note) dep_type1;
7208 enum attr_itanium_class dep_class;
7209 enum attr_itanium_class insn_class;
7211 insn_class = ia64_safe_itanium_class (insn);
7212 dep_class = ia64_safe_itanium_class (dep_insn);
7214 /* Treat true memory dependencies separately. Ignore apparent true
7215 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7216 if (dep_type == REG_DEP_TRUE
7217 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7218 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7219 return 0;
7221 if (dw == MIN_DEP_WEAK)
7222 /* Store and load are likely to alias, use higher cost to avoid stall. */
7223 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7224 else if (dw > MIN_DEP_WEAK)
7226 /* Store and load are less likely to alias. */
7227 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7228 /* Assume there will be no cache conflict for floating-point data.
7229 For integer data, L1 conflict penalty is huge (17 cycles), so we
7230 never assume it will not cause a conflict. */
7231 return 0;
7232 else
7233 return cost;
7236 if (dep_type != REG_DEP_OUTPUT)
7237 return cost;
7239 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7240 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7241 return 0;
7243 return cost;
7246 /* Like emit_insn_before, but skip cycle_display notes.
7247 ??? When cycle display notes are implemented, update this. */
7249 static void
7250 ia64_emit_insn_before (rtx insn, rtx before)
7252 emit_insn_before (insn, before);
7255 /* The following function marks insns who produce addresses for load
7256 and store insns. Such insns will be placed into M slots because it
7257 decrease latency time for Itanium1 (see function
7258 `ia64_produce_address_p' and the DFA descriptions). */
7260 static void
7261 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7263 rtx_insn *insn, *next, *next_tail;
7265 /* Before reload, which_alternative is not set, which means that
7266 ia64_safe_itanium_class will produce wrong results for (at least)
7267 move instructions. */
7268 if (!reload_completed)
7269 return;
7271 next_tail = NEXT_INSN (tail);
7272 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7273 if (INSN_P (insn))
7274 insn->call = 0;
7275 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7276 if (INSN_P (insn)
7277 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7279 sd_iterator_def sd_it;
7280 dep_t dep;
7281 bool has_mem_op_consumer_p = false;
7283 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7285 enum attr_itanium_class c;
7287 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7288 continue;
7290 next = DEP_CON (dep);
7291 c = ia64_safe_itanium_class (next);
7292 if ((c == ITANIUM_CLASS_ST
7293 || c == ITANIUM_CLASS_STF)
7294 && ia64_st_address_bypass_p (insn, next))
7296 has_mem_op_consumer_p = true;
7297 break;
7299 else if ((c == ITANIUM_CLASS_LD
7300 || c == ITANIUM_CLASS_FLD
7301 || c == ITANIUM_CLASS_FLDP)
7302 && ia64_ld_address_bypass_p (insn, next))
7304 has_mem_op_consumer_p = true;
7305 break;
7309 insn->call = has_mem_op_consumer_p;
7313 /* We're beginning a new block. Initialize data structures as necessary. */
7315 static void
7316 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7317 int sched_verbose ATTRIBUTE_UNUSED,
7318 int max_ready ATTRIBUTE_UNUSED)
7320 #ifdef ENABLE_CHECKING
7321 rtx_insn *insn;
7323 if (!sel_sched_p () && reload_completed)
7324 for (insn = NEXT_INSN (current_sched_info->prev_head);
7325 insn != current_sched_info->next_tail;
7326 insn = NEXT_INSN (insn))
7327 gcc_assert (!SCHED_GROUP_P (insn));
7328 #endif
7329 last_scheduled_insn = NULL;
7330 init_insn_group_barriers ();
7332 current_cycle = 0;
7333 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7336 /* We're beginning a scheduling pass. Check assertion. */
7338 static void
7339 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7340 int sched_verbose ATTRIBUTE_UNUSED,
7341 int max_ready ATTRIBUTE_UNUSED)
7343 gcc_assert (pending_data_specs == 0);
7346 /* Scheduling pass is now finished. Free/reset static variable. */
7347 static void
7348 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7349 int sched_verbose ATTRIBUTE_UNUSED)
7351 gcc_assert (pending_data_specs == 0);
7354 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7355 speculation check), FALSE otherwise. */
7356 static bool
7357 is_load_p (rtx_insn *insn)
7359 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7361 return
7362 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7363 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7366 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7367 (taking account for 3-cycle cache reference postponing for stores: Intel
7368 Itanium 2 Reference Manual for Software Development and Optimization,
7369 6.7.3.1). */
7370 static void
7371 record_memory_reference (rtx_insn *insn)
7373 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7375 switch (insn_class) {
7376 case ITANIUM_CLASS_FLD:
7377 case ITANIUM_CLASS_LD:
7378 mem_ops_in_group[current_cycle % 4]++;
7379 break;
7380 case ITANIUM_CLASS_STF:
7381 case ITANIUM_CLASS_ST:
7382 mem_ops_in_group[(current_cycle + 3) % 4]++;
7383 break;
7384 default:;
7388 /* We are about to being issuing insns for this clock cycle.
7389 Override the default sort algorithm to better slot instructions. */
7391 static int
7392 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7393 int *pn_ready, int clock_var,
7394 int reorder_type)
7396 int n_asms;
7397 int n_ready = *pn_ready;
7398 rtx_insn **e_ready = ready + n_ready;
7399 rtx_insn **insnp;
7401 if (sched_verbose)
7402 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7404 if (reorder_type == 0)
7406 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7407 n_asms = 0;
7408 for (insnp = ready; insnp < e_ready; insnp++)
7409 if (insnp < e_ready)
7411 rtx_insn *insn = *insnp;
7412 enum attr_type t = ia64_safe_type (insn);
7413 if (t == TYPE_UNKNOWN)
7415 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7416 || asm_noperands (PATTERN (insn)) >= 0)
7418 rtx_insn *lowest = ready[n_asms];
7419 ready[n_asms] = insn;
7420 *insnp = lowest;
7421 n_asms++;
7423 else
7425 rtx_insn *highest = ready[n_ready - 1];
7426 ready[n_ready - 1] = insn;
7427 *insnp = highest;
7428 return 1;
7433 if (n_asms < n_ready)
7435 /* Some normal insns to process. Skip the asms. */
7436 ready += n_asms;
7437 n_ready -= n_asms;
7439 else if (n_ready > 0)
7440 return 1;
7443 if (ia64_final_schedule)
7445 int deleted = 0;
7446 int nr_need_stop = 0;
7448 for (insnp = ready; insnp < e_ready; insnp++)
7449 if (safe_group_barrier_needed (*insnp))
7450 nr_need_stop++;
7452 if (reorder_type == 1 && n_ready == nr_need_stop)
7453 return 0;
7454 if (reorder_type == 0)
7455 return 1;
7456 insnp = e_ready;
7457 /* Move down everything that needs a stop bit, preserving
7458 relative order. */
7459 while (insnp-- > ready + deleted)
7460 while (insnp >= ready + deleted)
7462 rtx_insn *insn = *insnp;
7463 if (! safe_group_barrier_needed (insn))
7464 break;
7465 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7466 *ready = insn;
7467 deleted++;
7469 n_ready -= deleted;
7470 ready += deleted;
7473 current_cycle = clock_var;
7474 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7476 int moved = 0;
7478 insnp = e_ready;
7479 /* Move down loads/stores, preserving relative order. */
7480 while (insnp-- > ready + moved)
7481 while (insnp >= ready + moved)
7483 rtx_insn *insn = *insnp;
7484 if (! is_load_p (insn))
7485 break;
7486 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7487 *ready = insn;
7488 moved++;
7490 n_ready -= moved;
7491 ready += moved;
7494 return 1;
7497 /* We are about to being issuing insns for this clock cycle. Override
7498 the default sort algorithm to better slot instructions. */
7500 static int
7501 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7502 int *pn_ready, int clock_var)
7504 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7505 pn_ready, clock_var, 0);
7508 /* Like ia64_sched_reorder, but called after issuing each insn.
7509 Override the default sort algorithm to better slot instructions. */
7511 static int
7512 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7513 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7514 int *pn_ready, int clock_var)
7516 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7517 clock_var, 1);
7520 /* We are about to issue INSN. Return the number of insns left on the
7521 ready queue that can be issued this cycle. */
7523 static int
7524 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7525 int sched_verbose ATTRIBUTE_UNUSED,
7526 rtx_insn *insn,
7527 int can_issue_more ATTRIBUTE_UNUSED)
7529 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7530 /* Modulo scheduling does not extend h_i_d when emitting
7531 new instructions. Don't use h_i_d, if we don't have to. */
7533 if (DONE_SPEC (insn) & BEGIN_DATA)
7534 pending_data_specs++;
7535 if (CHECK_SPEC (insn) & BEGIN_DATA)
7536 pending_data_specs--;
7539 if (DEBUG_INSN_P (insn))
7540 return 1;
7542 last_scheduled_insn = insn;
7543 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7544 if (reload_completed)
7546 int needed = group_barrier_needed (insn);
7548 gcc_assert (!needed);
7549 if (CALL_P (insn))
7550 init_insn_group_barriers ();
7551 stops_p [INSN_UID (insn)] = stop_before_p;
7552 stop_before_p = 0;
7554 record_memory_reference (insn);
7556 return 1;
7559 /* We are choosing insn from the ready queue. Return zero if INSN
7560 can be chosen. */
7562 static int
7563 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7565 gcc_assert (insn && INSN_P (insn));
7567 /* Size of ALAT is 32. As far as we perform conservative
7568 data speculation, we keep ALAT half-empty. */
7569 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7570 return ready_index == 0 ? -1 : 1;
7572 if (ready_index == 0)
7573 return 0;
7575 if ((!reload_completed
7576 || !safe_group_barrier_needed (insn))
7577 && (!mflag_sched_mem_insns_hard_limit
7578 || !is_load_p (insn)
7579 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7580 return 0;
7582 return 1;
7585 /* The following variable value is pseudo-insn used by the DFA insn
7586 scheduler to change the DFA state when the simulated clock is
7587 increased. */
7589 static rtx_insn *dfa_pre_cycle_insn;
7591 /* Returns 1 when a meaningful insn was scheduled between the last group
7592 barrier and LAST. */
7593 static int
7594 scheduled_good_insn (rtx_insn *last)
7596 if (last && recog_memoized (last) >= 0)
7597 return 1;
7599 for ( ;
7600 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7601 && !stops_p[INSN_UID (last)];
7602 last = PREV_INSN (last))
7603 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7604 the ebb we're scheduling. */
7605 if (INSN_P (last) && recog_memoized (last) >= 0)
7606 return 1;
7608 return 0;
7611 /* We are about to being issuing INSN. Return nonzero if we cannot
7612 issue it on given cycle CLOCK and return zero if we should not sort
7613 the ready queue on the next clock start. */
7615 static int
7616 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7617 int clock, int *sort_p)
7619 gcc_assert (insn && INSN_P (insn));
7621 if (DEBUG_INSN_P (insn))
7622 return 0;
7624 /* When a group barrier is needed for insn, last_scheduled_insn
7625 should be set. */
7626 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7627 || last_scheduled_insn);
7629 if ((reload_completed
7630 && (safe_group_barrier_needed (insn)
7631 || (mflag_sched_stop_bits_after_every_cycle
7632 && last_clock != clock
7633 && last_scheduled_insn
7634 && scheduled_good_insn (last_scheduled_insn))))
7635 || (last_scheduled_insn
7636 && (CALL_P (last_scheduled_insn)
7637 || unknown_for_bundling_p (last_scheduled_insn))))
7639 init_insn_group_barriers ();
7641 if (verbose && dump)
7642 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7643 last_clock == clock ? " + cycle advance" : "");
7645 stop_before_p = 1;
7646 current_cycle = clock;
7647 mem_ops_in_group[current_cycle % 4] = 0;
7649 if (last_clock == clock)
7651 state_transition (curr_state, dfa_stop_insn);
7652 if (TARGET_EARLY_STOP_BITS)
7653 *sort_p = (last_scheduled_insn == NULL_RTX
7654 || ! CALL_P (last_scheduled_insn));
7655 else
7656 *sort_p = 0;
7657 return 1;
7660 if (last_scheduled_insn)
7662 if (unknown_for_bundling_p (last_scheduled_insn))
7663 state_reset (curr_state);
7664 else
7666 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7667 state_transition (curr_state, dfa_stop_insn);
7668 state_transition (curr_state, dfa_pre_cycle_insn);
7669 state_transition (curr_state, NULL);
7673 return 0;
7676 /* Implement targetm.sched.h_i_d_extended hook.
7677 Extend internal data structures. */
7678 static void
7679 ia64_h_i_d_extended (void)
7681 if (stops_p != NULL)
7683 int new_clocks_length = get_max_uid () * 3 / 2;
7684 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7685 clocks_length = new_clocks_length;
7690 /* This structure describes the data used by the backend to guide scheduling.
7691 When the current scheduling point is switched, this data should be saved
7692 and restored later, if the scheduler returns to this point. */
7693 struct _ia64_sched_context
7695 state_t prev_cycle_state;
7696 rtx_insn *last_scheduled_insn;
7697 struct reg_write_state rws_sum[NUM_REGS];
7698 struct reg_write_state rws_insn[NUM_REGS];
7699 int first_instruction;
7700 int pending_data_specs;
7701 int current_cycle;
7702 char mem_ops_in_group[4];
7704 typedef struct _ia64_sched_context *ia64_sched_context_t;
7706 /* Allocates a scheduling context. */
7707 static void *
7708 ia64_alloc_sched_context (void)
7710 return xmalloc (sizeof (struct _ia64_sched_context));
7713 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7714 the global context otherwise. */
7715 static void
7716 ia64_init_sched_context (void *_sc, bool clean_p)
7718 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7720 sc->prev_cycle_state = xmalloc (dfa_state_size);
7721 if (clean_p)
7723 state_reset (sc->prev_cycle_state);
7724 sc->last_scheduled_insn = NULL;
7725 memset (sc->rws_sum, 0, sizeof (rws_sum));
7726 memset (sc->rws_insn, 0, sizeof (rws_insn));
7727 sc->first_instruction = 1;
7728 sc->pending_data_specs = 0;
7729 sc->current_cycle = 0;
7730 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7732 else
7734 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7735 sc->last_scheduled_insn = last_scheduled_insn;
7736 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7737 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7738 sc->first_instruction = first_instruction;
7739 sc->pending_data_specs = pending_data_specs;
7740 sc->current_cycle = current_cycle;
7741 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7745 /* Sets the global scheduling context to the one pointed to by _SC. */
7746 static void
7747 ia64_set_sched_context (void *_sc)
7749 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7751 gcc_assert (sc != NULL);
7753 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7754 last_scheduled_insn = sc->last_scheduled_insn;
7755 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7756 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7757 first_instruction = sc->first_instruction;
7758 pending_data_specs = sc->pending_data_specs;
7759 current_cycle = sc->current_cycle;
7760 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7763 /* Clears the data in the _SC scheduling context. */
7764 static void
7765 ia64_clear_sched_context (void *_sc)
7767 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7769 free (sc->prev_cycle_state);
7770 sc->prev_cycle_state = NULL;
7773 /* Frees the _SC scheduling context. */
7774 static void
7775 ia64_free_sched_context (void *_sc)
7777 gcc_assert (_sc != NULL);
7779 free (_sc);
7782 typedef rtx (* gen_func_t) (rtx, rtx);
7784 /* Return a function that will generate a load of mode MODE_NO
7785 with speculation types TS. */
7786 static gen_func_t
7787 get_spec_load_gen_function (ds_t ts, int mode_no)
7789 static gen_func_t gen_ld_[] = {
7790 gen_movbi,
7791 gen_movqi_internal,
7792 gen_movhi_internal,
7793 gen_movsi_internal,
7794 gen_movdi_internal,
7795 gen_movsf_internal,
7796 gen_movdf_internal,
7797 gen_movxf_internal,
7798 gen_movti_internal,
7799 gen_zero_extendqidi2,
7800 gen_zero_extendhidi2,
7801 gen_zero_extendsidi2,
7804 static gen_func_t gen_ld_a[] = {
7805 gen_movbi_advanced,
7806 gen_movqi_advanced,
7807 gen_movhi_advanced,
7808 gen_movsi_advanced,
7809 gen_movdi_advanced,
7810 gen_movsf_advanced,
7811 gen_movdf_advanced,
7812 gen_movxf_advanced,
7813 gen_movti_advanced,
7814 gen_zero_extendqidi2_advanced,
7815 gen_zero_extendhidi2_advanced,
7816 gen_zero_extendsidi2_advanced,
7818 static gen_func_t gen_ld_s[] = {
7819 gen_movbi_speculative,
7820 gen_movqi_speculative,
7821 gen_movhi_speculative,
7822 gen_movsi_speculative,
7823 gen_movdi_speculative,
7824 gen_movsf_speculative,
7825 gen_movdf_speculative,
7826 gen_movxf_speculative,
7827 gen_movti_speculative,
7828 gen_zero_extendqidi2_speculative,
7829 gen_zero_extendhidi2_speculative,
7830 gen_zero_extendsidi2_speculative,
7832 static gen_func_t gen_ld_sa[] = {
7833 gen_movbi_speculative_advanced,
7834 gen_movqi_speculative_advanced,
7835 gen_movhi_speculative_advanced,
7836 gen_movsi_speculative_advanced,
7837 gen_movdi_speculative_advanced,
7838 gen_movsf_speculative_advanced,
7839 gen_movdf_speculative_advanced,
7840 gen_movxf_speculative_advanced,
7841 gen_movti_speculative_advanced,
7842 gen_zero_extendqidi2_speculative_advanced,
7843 gen_zero_extendhidi2_speculative_advanced,
7844 gen_zero_extendsidi2_speculative_advanced,
7846 static gen_func_t gen_ld_s_a[] = {
7847 gen_movbi_speculative_a,
7848 gen_movqi_speculative_a,
7849 gen_movhi_speculative_a,
7850 gen_movsi_speculative_a,
7851 gen_movdi_speculative_a,
7852 gen_movsf_speculative_a,
7853 gen_movdf_speculative_a,
7854 gen_movxf_speculative_a,
7855 gen_movti_speculative_a,
7856 gen_zero_extendqidi2_speculative_a,
7857 gen_zero_extendhidi2_speculative_a,
7858 gen_zero_extendsidi2_speculative_a,
7861 gen_func_t *gen_ld;
7863 if (ts & BEGIN_DATA)
7865 if (ts & BEGIN_CONTROL)
7866 gen_ld = gen_ld_sa;
7867 else
7868 gen_ld = gen_ld_a;
7870 else if (ts & BEGIN_CONTROL)
7872 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7873 || ia64_needs_block_p (ts))
7874 gen_ld = gen_ld_s;
7875 else
7876 gen_ld = gen_ld_s_a;
7878 else if (ts == 0)
7879 gen_ld = gen_ld_;
7880 else
7881 gcc_unreachable ();
7883 return gen_ld[mode_no];
7886 /* Constants that help mapping 'machine_mode' to int. */
7887 enum SPEC_MODES
7889 SPEC_MODE_INVALID = -1,
7890 SPEC_MODE_FIRST = 0,
7891 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7892 SPEC_MODE_FOR_EXTEND_LAST = 3,
7893 SPEC_MODE_LAST = 8
7896 enum
7898 /* Offset to reach ZERO_EXTEND patterns. */
7899 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7902 /* Return index of the MODE. */
7903 static int
7904 ia64_mode_to_int (machine_mode mode)
7906 switch (mode)
7908 case BImode: return 0; /* SPEC_MODE_FIRST */
7909 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7910 case HImode: return 2;
7911 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7912 case DImode: return 4;
7913 case SFmode: return 5;
7914 case DFmode: return 6;
7915 case XFmode: return 7;
7916 case TImode:
7917 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7918 mentioned in itanium[12].md. Predicate fp_register_operand also
7919 needs to be defined. Bottom line: better disable for now. */
7920 return SPEC_MODE_INVALID;
7921 default: return SPEC_MODE_INVALID;
7925 /* Provide information about speculation capabilities. */
7926 static void
7927 ia64_set_sched_flags (spec_info_t spec_info)
7929 unsigned int *flags = &(current_sched_info->flags);
7931 if (*flags & SCHED_RGN
7932 || *flags & SCHED_EBB
7933 || *flags & SEL_SCHED)
7935 int mask = 0;
7937 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7938 || (mflag_sched_ar_data_spec && reload_completed))
7940 mask |= BEGIN_DATA;
7942 if (!sel_sched_p ()
7943 && ((mflag_sched_br_in_data_spec && !reload_completed)
7944 || (mflag_sched_ar_in_data_spec && reload_completed)))
7945 mask |= BE_IN_DATA;
7948 if (mflag_sched_control_spec
7949 && (!sel_sched_p ()
7950 || reload_completed))
7952 mask |= BEGIN_CONTROL;
7954 if (!sel_sched_p () && mflag_sched_in_control_spec)
7955 mask |= BE_IN_CONTROL;
7958 spec_info->mask = mask;
7960 if (mask)
7962 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7964 if (mask & BE_IN_SPEC)
7965 *flags |= NEW_BBS;
7967 spec_info->flags = 0;
7969 if ((mask & CONTROL_SPEC)
7970 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7971 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7973 if (sched_verbose >= 1)
7974 spec_info->dump = sched_dump;
7975 else
7976 spec_info->dump = 0;
7978 if (mflag_sched_count_spec_in_critical_path)
7979 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7982 else
7983 spec_info->mask = 0;
7986 /* If INSN is an appropriate load return its mode.
7987 Return -1 otherwise. */
7988 static int
7989 get_mode_no_for_insn (rtx_insn *insn)
7991 rtx reg, mem, mode_rtx;
7992 int mode_no;
7993 bool extend_p;
7995 extract_insn_cached (insn);
7997 /* We use WHICH_ALTERNATIVE only after reload. This will
7998 guarantee that reload won't touch a speculative insn. */
8000 if (recog_data.n_operands != 2)
8001 return -1;
8003 reg = recog_data.operand[0];
8004 mem = recog_data.operand[1];
8006 /* We should use MEM's mode since REG's mode in presence of
8007 ZERO_EXTEND will always be DImode. */
8008 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8009 /* Process non-speculative ld. */
8011 if (!reload_completed)
8013 /* Do not speculate into regs like ar.lc. */
8014 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8015 return -1;
8017 if (!MEM_P (mem))
8018 return -1;
8021 rtx mem_reg = XEXP (mem, 0);
8023 if (!REG_P (mem_reg))
8024 return -1;
8027 mode_rtx = mem;
8029 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8031 gcc_assert (REG_P (reg) && MEM_P (mem));
8032 mode_rtx = mem;
8034 else
8035 return -1;
8037 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8038 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8039 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8040 /* Process speculative ld or ld.c. */
8042 gcc_assert (REG_P (reg) && MEM_P (mem));
8043 mode_rtx = mem;
8045 else
8047 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8049 if (attr_class == ITANIUM_CLASS_CHK_A
8050 || attr_class == ITANIUM_CLASS_CHK_S_I
8051 || attr_class == ITANIUM_CLASS_CHK_S_F)
8052 /* Process chk. */
8053 mode_rtx = reg;
8054 else
8055 return -1;
8058 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8060 if (mode_no == SPEC_MODE_INVALID)
8061 return -1;
8063 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8065 if (extend_p)
8067 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8068 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8069 return -1;
8071 mode_no += SPEC_GEN_EXTEND_OFFSET;
8074 return mode_no;
8077 /* If X is an unspec part of a speculative load, return its code.
8078 Return -1 otherwise. */
8079 static int
8080 get_spec_unspec_code (const_rtx x)
8082 if (GET_CODE (x) != UNSPEC)
8083 return -1;
8086 int code;
8088 code = XINT (x, 1);
8090 switch (code)
8092 case UNSPEC_LDA:
8093 case UNSPEC_LDS:
8094 case UNSPEC_LDS_A:
8095 case UNSPEC_LDSA:
8096 return code;
8098 default:
8099 return -1;
8104 /* Implement skip_rtx_p hook. */
8105 static bool
8106 ia64_skip_rtx_p (const_rtx x)
8108 return get_spec_unspec_code (x) != -1;
8111 /* If INSN is a speculative load, return its UNSPEC code.
8112 Return -1 otherwise. */
8113 static int
8114 get_insn_spec_code (const_rtx insn)
8116 rtx pat, reg, mem;
8118 pat = PATTERN (insn);
8120 if (GET_CODE (pat) == COND_EXEC)
8121 pat = COND_EXEC_CODE (pat);
8123 if (GET_CODE (pat) != SET)
8124 return -1;
8126 reg = SET_DEST (pat);
8127 if (!REG_P (reg))
8128 return -1;
8130 mem = SET_SRC (pat);
8131 if (GET_CODE (mem) == ZERO_EXTEND)
8132 mem = XEXP (mem, 0);
8134 return get_spec_unspec_code (mem);
8137 /* If INSN is a speculative load, return a ds with the speculation types.
8138 Otherwise [if INSN is a normal instruction] return 0. */
8139 static ds_t
8140 ia64_get_insn_spec_ds (rtx_insn *insn)
8142 int code = get_insn_spec_code (insn);
8144 switch (code)
8146 case UNSPEC_LDA:
8147 return BEGIN_DATA;
8149 case UNSPEC_LDS:
8150 case UNSPEC_LDS_A:
8151 return BEGIN_CONTROL;
8153 case UNSPEC_LDSA:
8154 return BEGIN_DATA | BEGIN_CONTROL;
8156 default:
8157 return 0;
8161 /* If INSN is a speculative load return a ds with the speculation types that
8162 will be checked.
8163 Otherwise [if INSN is a normal instruction] return 0. */
8164 static ds_t
8165 ia64_get_insn_checked_ds (rtx_insn *insn)
8167 int code = get_insn_spec_code (insn);
8169 switch (code)
8171 case UNSPEC_LDA:
8172 return BEGIN_DATA | BEGIN_CONTROL;
8174 case UNSPEC_LDS:
8175 return BEGIN_CONTROL;
8177 case UNSPEC_LDS_A:
8178 case UNSPEC_LDSA:
8179 return BEGIN_DATA | BEGIN_CONTROL;
8181 default:
8182 return 0;
8186 /* If GEN_P is true, calculate the index of needed speculation check and return
8187 speculative pattern for INSN with speculative mode TS, machine mode
8188 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8189 If GEN_P is false, just calculate the index of needed speculation check. */
8190 static rtx
8191 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8193 rtx pat, new_pat;
8194 gen_func_t gen_load;
8196 gen_load = get_spec_load_gen_function (ts, mode_no);
8198 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8199 copy_rtx (recog_data.operand[1]));
8201 pat = PATTERN (insn);
8202 if (GET_CODE (pat) == COND_EXEC)
8203 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8204 new_pat);
8206 return new_pat;
8209 static bool
8210 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8211 ds_t ds ATTRIBUTE_UNUSED)
8213 return false;
8216 /* Implement targetm.sched.speculate_insn hook.
8217 Check if the INSN can be TS speculative.
8218 If 'no' - return -1.
8219 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8220 If current pattern of the INSN already provides TS speculation,
8221 return 0. */
8222 static int
8223 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8225 int mode_no;
8226 int res;
8228 gcc_assert (!(ts & ~SPECULATIVE));
8230 if (ia64_spec_check_p (insn))
8231 return -1;
8233 if ((ts & BE_IN_SPEC)
8234 && !insn_can_be_in_speculative_p (insn, ts))
8235 return -1;
8237 mode_no = get_mode_no_for_insn (insn);
8239 if (mode_no != SPEC_MODE_INVALID)
8241 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8242 res = 0;
8243 else
8245 res = 1;
8246 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8249 else
8250 res = -1;
8252 return res;
8255 /* Return a function that will generate a check for speculation TS with mode
8256 MODE_NO.
8257 If simple check is needed, pass true for SIMPLE_CHECK_P.
8258 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8259 static gen_func_t
8260 get_spec_check_gen_function (ds_t ts, int mode_no,
8261 bool simple_check_p, bool clearing_check_p)
8263 static gen_func_t gen_ld_c_clr[] = {
8264 gen_movbi_clr,
8265 gen_movqi_clr,
8266 gen_movhi_clr,
8267 gen_movsi_clr,
8268 gen_movdi_clr,
8269 gen_movsf_clr,
8270 gen_movdf_clr,
8271 gen_movxf_clr,
8272 gen_movti_clr,
8273 gen_zero_extendqidi2_clr,
8274 gen_zero_extendhidi2_clr,
8275 gen_zero_extendsidi2_clr,
8277 static gen_func_t gen_ld_c_nc[] = {
8278 gen_movbi_nc,
8279 gen_movqi_nc,
8280 gen_movhi_nc,
8281 gen_movsi_nc,
8282 gen_movdi_nc,
8283 gen_movsf_nc,
8284 gen_movdf_nc,
8285 gen_movxf_nc,
8286 gen_movti_nc,
8287 gen_zero_extendqidi2_nc,
8288 gen_zero_extendhidi2_nc,
8289 gen_zero_extendsidi2_nc,
8291 static gen_func_t gen_chk_a_clr[] = {
8292 gen_advanced_load_check_clr_bi,
8293 gen_advanced_load_check_clr_qi,
8294 gen_advanced_load_check_clr_hi,
8295 gen_advanced_load_check_clr_si,
8296 gen_advanced_load_check_clr_di,
8297 gen_advanced_load_check_clr_sf,
8298 gen_advanced_load_check_clr_df,
8299 gen_advanced_load_check_clr_xf,
8300 gen_advanced_load_check_clr_ti,
8301 gen_advanced_load_check_clr_di,
8302 gen_advanced_load_check_clr_di,
8303 gen_advanced_load_check_clr_di,
8305 static gen_func_t gen_chk_a_nc[] = {
8306 gen_advanced_load_check_nc_bi,
8307 gen_advanced_load_check_nc_qi,
8308 gen_advanced_load_check_nc_hi,
8309 gen_advanced_load_check_nc_si,
8310 gen_advanced_load_check_nc_di,
8311 gen_advanced_load_check_nc_sf,
8312 gen_advanced_load_check_nc_df,
8313 gen_advanced_load_check_nc_xf,
8314 gen_advanced_load_check_nc_ti,
8315 gen_advanced_load_check_nc_di,
8316 gen_advanced_load_check_nc_di,
8317 gen_advanced_load_check_nc_di,
8319 static gen_func_t gen_chk_s[] = {
8320 gen_speculation_check_bi,
8321 gen_speculation_check_qi,
8322 gen_speculation_check_hi,
8323 gen_speculation_check_si,
8324 gen_speculation_check_di,
8325 gen_speculation_check_sf,
8326 gen_speculation_check_df,
8327 gen_speculation_check_xf,
8328 gen_speculation_check_ti,
8329 gen_speculation_check_di,
8330 gen_speculation_check_di,
8331 gen_speculation_check_di,
8334 gen_func_t *gen_check;
8336 if (ts & BEGIN_DATA)
8338 /* We don't need recovery because even if this is ld.sa
8339 ALAT entry will be allocated only if NAT bit is set to zero.
8340 So it is enough to use ld.c here. */
8342 if (simple_check_p)
8344 gcc_assert (mflag_sched_spec_ldc);
8346 if (clearing_check_p)
8347 gen_check = gen_ld_c_clr;
8348 else
8349 gen_check = gen_ld_c_nc;
8351 else
8353 if (clearing_check_p)
8354 gen_check = gen_chk_a_clr;
8355 else
8356 gen_check = gen_chk_a_nc;
8359 else if (ts & BEGIN_CONTROL)
8361 if (simple_check_p)
8362 /* We might want to use ld.sa -> ld.c instead of
8363 ld.s -> chk.s. */
8365 gcc_assert (!ia64_needs_block_p (ts));
8367 if (clearing_check_p)
8368 gen_check = gen_ld_c_clr;
8369 else
8370 gen_check = gen_ld_c_nc;
8372 else
8374 gen_check = gen_chk_s;
8377 else
8378 gcc_unreachable ();
8380 gcc_assert (mode_no >= 0);
8381 return gen_check[mode_no];
8384 /* Return nonzero, if INSN needs branchy recovery check. */
8385 static bool
8386 ia64_needs_block_p (ds_t ts)
8388 if (ts & BEGIN_DATA)
8389 return !mflag_sched_spec_ldc;
8391 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8393 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8396 /* Generate (or regenerate) a recovery check for INSN. */
8397 static rtx
8398 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8400 rtx op1, pat, check_pat;
8401 gen_func_t gen_check;
8402 int mode_no;
8404 mode_no = get_mode_no_for_insn (insn);
8405 gcc_assert (mode_no >= 0);
8407 if (label)
8408 op1 = label;
8409 else
8411 gcc_assert (!ia64_needs_block_p (ds));
8412 op1 = copy_rtx (recog_data.operand[1]);
8415 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8416 true);
8418 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8420 pat = PATTERN (insn);
8421 if (GET_CODE (pat) == COND_EXEC)
8422 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8423 check_pat);
8425 return check_pat;
8428 /* Return nonzero, if X is branchy recovery check. */
8429 static int
8430 ia64_spec_check_p (rtx x)
8432 x = PATTERN (x);
8433 if (GET_CODE (x) == COND_EXEC)
8434 x = COND_EXEC_CODE (x);
8435 if (GET_CODE (x) == SET)
8436 return ia64_spec_check_src_p (SET_SRC (x));
8437 return 0;
8440 /* Return nonzero, if SRC belongs to recovery check. */
8441 static int
8442 ia64_spec_check_src_p (rtx src)
8444 if (GET_CODE (src) == IF_THEN_ELSE)
8446 rtx t;
8448 t = XEXP (src, 0);
8449 if (GET_CODE (t) == NE)
8451 t = XEXP (t, 0);
8453 if (GET_CODE (t) == UNSPEC)
8455 int code;
8457 code = XINT (t, 1);
8459 if (code == UNSPEC_LDCCLR
8460 || code == UNSPEC_LDCNC
8461 || code == UNSPEC_CHKACLR
8462 || code == UNSPEC_CHKANC
8463 || code == UNSPEC_CHKS)
8465 gcc_assert (code != 0);
8466 return code;
8471 return 0;
8475 /* The following page contains abstract data `bundle states' which are
8476 used for bundling insns (inserting nops and template generation). */
8478 /* The following describes state of insn bundling. */
8480 struct bundle_state
8482 /* Unique bundle state number to identify them in the debugging
8483 output */
8484 int unique_num;
8485 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
8486 /* number nops before and after the insn */
8487 short before_nops_num, after_nops_num;
8488 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8489 insn */
8490 int cost; /* cost of the state in cycles */
8491 int accumulated_insns_num; /* number of all previous insns including
8492 nops. L is considered as 2 insns */
8493 int branch_deviation; /* deviation of previous branches from 3rd slots */
8494 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8495 struct bundle_state *next; /* next state with the same insn_num */
8496 struct bundle_state *originator; /* originator (previous insn state) */
8497 /* All bundle states are in the following chain. */
8498 struct bundle_state *allocated_states_chain;
8499 /* The DFA State after issuing the insn and the nops. */
8500 state_t dfa_state;
8503 /* The following is map insn number to the corresponding bundle state. */
8505 static struct bundle_state **index_to_bundle_states;
8507 /* The unique number of next bundle state. */
8509 static int bundle_states_num;
8511 /* All allocated bundle states are in the following chain. */
8513 static struct bundle_state *allocated_bundle_states_chain;
8515 /* All allocated but not used bundle states are in the following
8516 chain. */
8518 static struct bundle_state *free_bundle_state_chain;
8521 /* The following function returns a free bundle state. */
8523 static struct bundle_state *
8524 get_free_bundle_state (void)
8526 struct bundle_state *result;
8528 if (free_bundle_state_chain != NULL)
8530 result = free_bundle_state_chain;
8531 free_bundle_state_chain = result->next;
8533 else
8535 result = XNEW (struct bundle_state);
8536 result->dfa_state = xmalloc (dfa_state_size);
8537 result->allocated_states_chain = allocated_bundle_states_chain;
8538 allocated_bundle_states_chain = result;
8540 result->unique_num = bundle_states_num++;
8541 return result;
8545 /* The following function frees given bundle state. */
8547 static void
8548 free_bundle_state (struct bundle_state *state)
8550 state->next = free_bundle_state_chain;
8551 free_bundle_state_chain = state;
8554 /* Start work with abstract data `bundle states'. */
8556 static void
8557 initiate_bundle_states (void)
8559 bundle_states_num = 0;
8560 free_bundle_state_chain = NULL;
8561 allocated_bundle_states_chain = NULL;
8564 /* Finish work with abstract data `bundle states'. */
8566 static void
8567 finish_bundle_states (void)
8569 struct bundle_state *curr_state, *next_state;
8571 for (curr_state = allocated_bundle_states_chain;
8572 curr_state != NULL;
8573 curr_state = next_state)
8575 next_state = curr_state->allocated_states_chain;
8576 free (curr_state->dfa_state);
8577 free (curr_state);
8581 /* Hashtable helpers. */
8583 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8585 static inline hashval_t hash (const bundle_state *);
8586 static inline bool equal (const bundle_state *, const bundle_state *);
8589 /* The function returns hash of BUNDLE_STATE. */
8591 inline hashval_t
8592 bundle_state_hasher::hash (const bundle_state *state)
8594 unsigned result, i;
8596 for (result = i = 0; i < dfa_state_size; i++)
8597 result += (((unsigned char *) state->dfa_state) [i]
8598 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8599 return result + state->insn_num;
8602 /* The function returns nonzero if the bundle state keys are equal. */
8604 inline bool
8605 bundle_state_hasher::equal (const bundle_state *state1,
8606 const bundle_state *state2)
8608 return (state1->insn_num == state2->insn_num
8609 && memcmp (state1->dfa_state, state2->dfa_state,
8610 dfa_state_size) == 0);
8613 /* Hash table of the bundle states. The key is dfa_state and insn_num
8614 of the bundle states. */
8616 static hash_table<bundle_state_hasher> *bundle_state_table;
8618 /* The function inserts the BUNDLE_STATE into the hash table. The
8619 function returns nonzero if the bundle has been inserted into the
8620 table. The table contains the best bundle state with given key. */
8622 static int
8623 insert_bundle_state (struct bundle_state *bundle_state)
8625 struct bundle_state **entry_ptr;
8627 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8628 if (*entry_ptr == NULL)
8630 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8631 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8632 *entry_ptr = bundle_state;
8633 return TRUE;
8635 else if (bundle_state->cost < (*entry_ptr)->cost
8636 || (bundle_state->cost == (*entry_ptr)->cost
8637 && ((*entry_ptr)->accumulated_insns_num
8638 > bundle_state->accumulated_insns_num
8639 || ((*entry_ptr)->accumulated_insns_num
8640 == bundle_state->accumulated_insns_num
8641 && ((*entry_ptr)->branch_deviation
8642 > bundle_state->branch_deviation
8643 || ((*entry_ptr)->branch_deviation
8644 == bundle_state->branch_deviation
8645 && (*entry_ptr)->middle_bundle_stops
8646 > bundle_state->middle_bundle_stops))))))
8649 struct bundle_state temp;
8651 temp = **entry_ptr;
8652 **entry_ptr = *bundle_state;
8653 (*entry_ptr)->next = temp.next;
8654 *bundle_state = temp;
8656 return FALSE;
8659 /* Start work with the hash table. */
8661 static void
8662 initiate_bundle_state_table (void)
8664 bundle_state_table = new hash_table<bundle_state_hasher> (50);
8667 /* Finish work with the hash table. */
8669 static void
8670 finish_bundle_state_table (void)
8672 delete bundle_state_table;
8673 bundle_state_table = NULL;
8678 /* The following variable is a insn `nop' used to check bundle states
8679 with different number of inserted nops. */
8681 static rtx_insn *ia64_nop;
8683 /* The following function tries to issue NOPS_NUM nops for the current
8684 state without advancing processor cycle. If it failed, the
8685 function returns FALSE and frees the current state. */
8687 static int
8688 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8690 int i;
8692 for (i = 0; i < nops_num; i++)
8693 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8695 free_bundle_state (curr_state);
8696 return FALSE;
8698 return TRUE;
8701 /* The following function tries to issue INSN for the current
8702 state without advancing processor cycle. If it failed, the
8703 function returns FALSE and frees the current state. */
8705 static int
8706 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8708 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8710 free_bundle_state (curr_state);
8711 return FALSE;
8713 return TRUE;
8716 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8717 starting with ORIGINATOR without advancing processor cycle. If
8718 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8719 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8720 If it was successful, the function creates new bundle state and
8721 insert into the hash table and into `index_to_bundle_states'. */
8723 static void
8724 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8725 rtx_insn *insn, int try_bundle_end_p,
8726 int only_bundle_end_p)
8728 struct bundle_state *curr_state;
8730 curr_state = get_free_bundle_state ();
8731 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8732 curr_state->insn = insn;
8733 curr_state->insn_num = originator->insn_num + 1;
8734 curr_state->cost = originator->cost;
8735 curr_state->originator = originator;
8736 curr_state->before_nops_num = before_nops_num;
8737 curr_state->after_nops_num = 0;
8738 curr_state->accumulated_insns_num
8739 = originator->accumulated_insns_num + before_nops_num;
8740 curr_state->branch_deviation = originator->branch_deviation;
8741 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8742 gcc_assert (insn);
8743 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8745 gcc_assert (GET_MODE (insn) != TImode);
8746 if (!try_issue_nops (curr_state, before_nops_num))
8747 return;
8748 if (!try_issue_insn (curr_state, insn))
8749 return;
8750 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8751 if (curr_state->accumulated_insns_num % 3 != 0)
8752 curr_state->middle_bundle_stops++;
8753 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8754 && curr_state->accumulated_insns_num % 3 != 0)
8756 free_bundle_state (curr_state);
8757 return;
8760 else if (GET_MODE (insn) != TImode)
8762 if (!try_issue_nops (curr_state, before_nops_num))
8763 return;
8764 if (!try_issue_insn (curr_state, insn))
8765 return;
8766 curr_state->accumulated_insns_num++;
8767 gcc_assert (!unknown_for_bundling_p (insn));
8769 if (ia64_safe_type (insn) == TYPE_L)
8770 curr_state->accumulated_insns_num++;
8772 else
8774 /* If this is an insn that must be first in a group, then don't allow
8775 nops to be emitted before it. Currently, alloc is the only such
8776 supported instruction. */
8777 /* ??? The bundling automatons should handle this for us, but they do
8778 not yet have support for the first_insn attribute. */
8779 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8781 free_bundle_state (curr_state);
8782 return;
8785 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8786 state_transition (curr_state->dfa_state, NULL);
8787 curr_state->cost++;
8788 if (!try_issue_nops (curr_state, before_nops_num))
8789 return;
8790 if (!try_issue_insn (curr_state, insn))
8791 return;
8792 curr_state->accumulated_insns_num++;
8793 if (unknown_for_bundling_p (insn))
8795 /* Finish bundle containing asm insn. */
8796 curr_state->after_nops_num
8797 = 3 - curr_state->accumulated_insns_num % 3;
8798 curr_state->accumulated_insns_num
8799 += 3 - curr_state->accumulated_insns_num % 3;
8801 else if (ia64_safe_type (insn) == TYPE_L)
8802 curr_state->accumulated_insns_num++;
8804 if (ia64_safe_type (insn) == TYPE_B)
8805 curr_state->branch_deviation
8806 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8807 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8809 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8811 state_t dfa_state;
8812 struct bundle_state *curr_state1;
8813 struct bundle_state *allocated_states_chain;
8815 curr_state1 = get_free_bundle_state ();
8816 dfa_state = curr_state1->dfa_state;
8817 allocated_states_chain = curr_state1->allocated_states_chain;
8818 *curr_state1 = *curr_state;
8819 curr_state1->dfa_state = dfa_state;
8820 curr_state1->allocated_states_chain = allocated_states_chain;
8821 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8822 dfa_state_size);
8823 curr_state = curr_state1;
8825 if (!try_issue_nops (curr_state,
8826 3 - curr_state->accumulated_insns_num % 3))
8827 return;
8828 curr_state->after_nops_num
8829 = 3 - curr_state->accumulated_insns_num % 3;
8830 curr_state->accumulated_insns_num
8831 += 3 - curr_state->accumulated_insns_num % 3;
8833 if (!insert_bundle_state (curr_state))
8834 free_bundle_state (curr_state);
8835 return;
8838 /* The following function returns position in the two window bundle
8839 for given STATE. */
8841 static int
8842 get_max_pos (state_t state)
8844 if (cpu_unit_reservation_p (state, pos_6))
8845 return 6;
8846 else if (cpu_unit_reservation_p (state, pos_5))
8847 return 5;
8848 else if (cpu_unit_reservation_p (state, pos_4))
8849 return 4;
8850 else if (cpu_unit_reservation_p (state, pos_3))
8851 return 3;
8852 else if (cpu_unit_reservation_p (state, pos_2))
8853 return 2;
8854 else if (cpu_unit_reservation_p (state, pos_1))
8855 return 1;
8856 else
8857 return 0;
8860 /* The function returns code of a possible template for given position
8861 and state. The function should be called only with 2 values of
8862 position equal to 3 or 6. We avoid generating F NOPs by putting
8863 templates containing F insns at the end of the template search
8864 because undocumented anomaly in McKinley derived cores which can
8865 cause stalls if an F-unit insn (including a NOP) is issued within a
8866 six-cycle window after reading certain application registers (such
8867 as ar.bsp). Furthermore, power-considerations also argue against
8868 the use of F-unit instructions unless they're really needed. */
8870 static int
8871 get_template (state_t state, int pos)
8873 switch (pos)
8875 case 3:
8876 if (cpu_unit_reservation_p (state, _0mmi_))
8877 return 1;
8878 else if (cpu_unit_reservation_p (state, _0mii_))
8879 return 0;
8880 else if (cpu_unit_reservation_p (state, _0mmb_))
8881 return 7;
8882 else if (cpu_unit_reservation_p (state, _0mib_))
8883 return 6;
8884 else if (cpu_unit_reservation_p (state, _0mbb_))
8885 return 5;
8886 else if (cpu_unit_reservation_p (state, _0bbb_))
8887 return 4;
8888 else if (cpu_unit_reservation_p (state, _0mmf_))
8889 return 3;
8890 else if (cpu_unit_reservation_p (state, _0mfi_))
8891 return 2;
8892 else if (cpu_unit_reservation_p (state, _0mfb_))
8893 return 8;
8894 else if (cpu_unit_reservation_p (state, _0mlx_))
8895 return 9;
8896 else
8897 gcc_unreachable ();
8898 case 6:
8899 if (cpu_unit_reservation_p (state, _1mmi_))
8900 return 1;
8901 else if (cpu_unit_reservation_p (state, _1mii_))
8902 return 0;
8903 else if (cpu_unit_reservation_p (state, _1mmb_))
8904 return 7;
8905 else if (cpu_unit_reservation_p (state, _1mib_))
8906 return 6;
8907 else if (cpu_unit_reservation_p (state, _1mbb_))
8908 return 5;
8909 else if (cpu_unit_reservation_p (state, _1bbb_))
8910 return 4;
8911 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8912 return 3;
8913 else if (cpu_unit_reservation_p (state, _1mfi_))
8914 return 2;
8915 else if (cpu_unit_reservation_p (state, _1mfb_))
8916 return 8;
8917 else if (cpu_unit_reservation_p (state, _1mlx_))
8918 return 9;
8919 else
8920 gcc_unreachable ();
8921 default:
8922 gcc_unreachable ();
8926 /* True when INSN is important for bundling. */
8928 static bool
8929 important_for_bundling_p (rtx_insn *insn)
8931 return (INSN_P (insn)
8932 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8933 && GET_CODE (PATTERN (insn)) != USE
8934 && GET_CODE (PATTERN (insn)) != CLOBBER);
8937 /* The following function returns an insn important for insn bundling
8938 followed by INSN and before TAIL. */
8940 static rtx_insn *
8941 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
8943 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8944 if (important_for_bundling_p (insn))
8945 return insn;
8946 return NULL;
8949 /* True when INSN is unknown, but important, for bundling. */
8951 static bool
8952 unknown_for_bundling_p (rtx_insn *insn)
8954 return (INSN_P (insn)
8955 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8956 && GET_CODE (PATTERN (insn)) != USE
8957 && GET_CODE (PATTERN (insn)) != CLOBBER);
8960 /* Add a bundle selector TEMPLATE0 before INSN. */
8962 static void
8963 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
8965 rtx b = gen_bundle_selector (GEN_INT (template0));
8967 ia64_emit_insn_before (b, insn);
8968 #if NR_BUNDLES == 10
8969 if ((template0 == 4 || template0 == 5)
8970 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8972 int i;
8973 rtx note = NULL_RTX;
8975 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8976 first or second slot. If it is and has REG_EH_NOTE set, copy it
8977 to following nops, as br.call sets rp to the address of following
8978 bundle and therefore an EH region end must be on a bundle
8979 boundary. */
8980 insn = PREV_INSN (insn);
8981 for (i = 0; i < 3; i++)
8984 insn = next_active_insn (insn);
8985 while (NONJUMP_INSN_P (insn)
8986 && get_attr_empty (insn) == EMPTY_YES);
8987 if (CALL_P (insn))
8988 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8989 else if (note)
8991 int code;
8993 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8994 || code == CODE_FOR_nop_b);
8995 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8996 note = NULL_RTX;
8997 else
8998 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9002 #endif
9005 /* The following function does insn bundling. Bundling means
9006 inserting templates and nop insns to fit insn groups into permitted
9007 templates. Instruction scheduling uses NDFA (non-deterministic
9008 finite automata) encoding informations about the templates and the
9009 inserted nops. Nondeterminism of the automata permits follows
9010 all possible insn sequences very fast.
9012 Unfortunately it is not possible to get information about inserting
9013 nop insns and used templates from the automata states. The
9014 automata only says that we can issue an insn possibly inserting
9015 some nops before it and using some template. Therefore insn
9016 bundling in this function is implemented by using DFA
9017 (deterministic finite automata). We follow all possible insn
9018 sequences by inserting 0-2 nops (that is what the NDFA describe for
9019 insn scheduling) before/after each insn being bundled. We know the
9020 start of simulated processor cycle from insn scheduling (insn
9021 starting a new cycle has TImode).
9023 Simple implementation of insn bundling would create enormous
9024 number of possible insn sequences satisfying information about new
9025 cycle ticks taken from the insn scheduling. To make the algorithm
9026 practical we use dynamic programming. Each decision (about
9027 inserting nops and implicitly about previous decisions) is described
9028 by structure bundle_state (see above). If we generate the same
9029 bundle state (key is automaton state after issuing the insns and
9030 nops for it), we reuse already generated one. As consequence we
9031 reject some decisions which cannot improve the solution and
9032 reduce memory for the algorithm.
9034 When we reach the end of EBB (extended basic block), we choose the
9035 best sequence and then, moving back in EBB, insert templates for
9036 the best alternative. The templates are taken from querying
9037 automaton state for each insn in chosen bundle states.
9039 So the algorithm makes two (forward and backward) passes through
9040 EBB. */
9042 static void
9043 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9045 struct bundle_state *curr_state, *next_state, *best_state;
9046 rtx_insn *insn, *next_insn;
9047 int insn_num;
9048 int i, bundle_end_p, only_bundle_end_p, asm_p;
9049 int pos = 0, max_pos, template0, template1;
9050 rtx_insn *b;
9051 enum attr_type type;
9053 insn_num = 0;
9054 /* Count insns in the EBB. */
9055 for (insn = NEXT_INSN (prev_head_insn);
9056 insn && insn != tail;
9057 insn = NEXT_INSN (insn))
9058 if (INSN_P (insn))
9059 insn_num++;
9060 if (insn_num == 0)
9061 return;
9062 bundling_p = 1;
9063 dfa_clean_insn_cache ();
9064 initiate_bundle_state_table ();
9065 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9066 /* First (forward) pass -- generation of bundle states. */
9067 curr_state = get_free_bundle_state ();
9068 curr_state->insn = NULL;
9069 curr_state->before_nops_num = 0;
9070 curr_state->after_nops_num = 0;
9071 curr_state->insn_num = 0;
9072 curr_state->cost = 0;
9073 curr_state->accumulated_insns_num = 0;
9074 curr_state->branch_deviation = 0;
9075 curr_state->middle_bundle_stops = 0;
9076 curr_state->next = NULL;
9077 curr_state->originator = NULL;
9078 state_reset (curr_state->dfa_state);
9079 index_to_bundle_states [0] = curr_state;
9080 insn_num = 0;
9081 /* Shift cycle mark if it is put on insn which could be ignored. */
9082 for (insn = NEXT_INSN (prev_head_insn);
9083 insn != tail;
9084 insn = NEXT_INSN (insn))
9085 if (INSN_P (insn)
9086 && !important_for_bundling_p (insn)
9087 && GET_MODE (insn) == TImode)
9089 PUT_MODE (insn, VOIDmode);
9090 for (next_insn = NEXT_INSN (insn);
9091 next_insn != tail;
9092 next_insn = NEXT_INSN (next_insn))
9093 if (important_for_bundling_p (next_insn)
9094 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9096 PUT_MODE (next_insn, TImode);
9097 break;
9100 /* Forward pass: generation of bundle states. */
9101 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9102 insn != NULL_RTX;
9103 insn = next_insn)
9105 gcc_assert (important_for_bundling_p (insn));
9106 type = ia64_safe_type (insn);
9107 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9108 insn_num++;
9109 index_to_bundle_states [insn_num] = NULL;
9110 for (curr_state = index_to_bundle_states [insn_num - 1];
9111 curr_state != NULL;
9112 curr_state = next_state)
9114 pos = curr_state->accumulated_insns_num % 3;
9115 next_state = curr_state->next;
9116 /* We must fill up the current bundle in order to start a
9117 subsequent asm insn in a new bundle. Asm insn is always
9118 placed in a separate bundle. */
9119 only_bundle_end_p
9120 = (next_insn != NULL_RTX
9121 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9122 && unknown_for_bundling_p (next_insn));
9123 /* We may fill up the current bundle if it is the cycle end
9124 without a group barrier. */
9125 bundle_end_p
9126 = (only_bundle_end_p || next_insn == NULL_RTX
9127 || (GET_MODE (next_insn) == TImode
9128 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9129 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9130 || type == TYPE_S)
9131 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9132 only_bundle_end_p);
9133 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9134 only_bundle_end_p);
9135 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9136 only_bundle_end_p);
9138 gcc_assert (index_to_bundle_states [insn_num]);
9139 for (curr_state = index_to_bundle_states [insn_num];
9140 curr_state != NULL;
9141 curr_state = curr_state->next)
9142 if (verbose >= 2 && dump)
9144 /* This structure is taken from generated code of the
9145 pipeline hazard recognizer (see file insn-attrtab.c).
9146 Please don't forget to change the structure if a new
9147 automaton is added to .md file. */
9148 struct DFA_chip
9150 unsigned short one_automaton_state;
9151 unsigned short oneb_automaton_state;
9152 unsigned short two_automaton_state;
9153 unsigned short twob_automaton_state;
9156 fprintf
9157 (dump,
9158 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9159 curr_state->unique_num,
9160 (curr_state->originator == NULL
9161 ? -1 : curr_state->originator->unique_num),
9162 curr_state->cost,
9163 curr_state->before_nops_num, curr_state->after_nops_num,
9164 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9165 curr_state->middle_bundle_stops,
9166 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9167 INSN_UID (insn));
9171 /* We should find a solution because the 2nd insn scheduling has
9172 found one. */
9173 gcc_assert (index_to_bundle_states [insn_num]);
9174 /* Find a state corresponding to the best insn sequence. */
9175 best_state = NULL;
9176 for (curr_state = index_to_bundle_states [insn_num];
9177 curr_state != NULL;
9178 curr_state = curr_state->next)
9179 /* We are just looking at the states with fully filled up last
9180 bundle. The first we prefer insn sequences with minimal cost
9181 then with minimal inserted nops and finally with branch insns
9182 placed in the 3rd slots. */
9183 if (curr_state->accumulated_insns_num % 3 == 0
9184 && (best_state == NULL || best_state->cost > curr_state->cost
9185 || (best_state->cost == curr_state->cost
9186 && (curr_state->accumulated_insns_num
9187 < best_state->accumulated_insns_num
9188 || (curr_state->accumulated_insns_num
9189 == best_state->accumulated_insns_num
9190 && (curr_state->branch_deviation
9191 < best_state->branch_deviation
9192 || (curr_state->branch_deviation
9193 == best_state->branch_deviation
9194 && curr_state->middle_bundle_stops
9195 < best_state->middle_bundle_stops)))))))
9196 best_state = curr_state;
9197 /* Second (backward) pass: adding nops and templates. */
9198 gcc_assert (best_state);
9199 insn_num = best_state->before_nops_num;
9200 template0 = template1 = -1;
9201 for (curr_state = best_state;
9202 curr_state->originator != NULL;
9203 curr_state = curr_state->originator)
9205 insn = curr_state->insn;
9206 asm_p = unknown_for_bundling_p (insn);
9207 insn_num++;
9208 if (verbose >= 2 && dump)
9210 struct DFA_chip
9212 unsigned short one_automaton_state;
9213 unsigned short oneb_automaton_state;
9214 unsigned short two_automaton_state;
9215 unsigned short twob_automaton_state;
9218 fprintf
9219 (dump,
9220 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9221 curr_state->unique_num,
9222 (curr_state->originator == NULL
9223 ? -1 : curr_state->originator->unique_num),
9224 curr_state->cost,
9225 curr_state->before_nops_num, curr_state->after_nops_num,
9226 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9227 curr_state->middle_bundle_stops,
9228 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9229 INSN_UID (insn));
9231 /* Find the position in the current bundle window. The window can
9232 contain at most two bundles. Two bundle window means that
9233 the processor will make two bundle rotation. */
9234 max_pos = get_max_pos (curr_state->dfa_state);
9235 if (max_pos == 6
9236 /* The following (negative template number) means that the
9237 processor did one bundle rotation. */
9238 || (max_pos == 3 && template0 < 0))
9240 /* We are at the end of the window -- find template(s) for
9241 its bundle(s). */
9242 pos = max_pos;
9243 if (max_pos == 3)
9244 template0 = get_template (curr_state->dfa_state, 3);
9245 else
9247 template1 = get_template (curr_state->dfa_state, 3);
9248 template0 = get_template (curr_state->dfa_state, 6);
9251 if (max_pos > 3 && template1 < 0)
9252 /* It may happen when we have the stop inside a bundle. */
9254 gcc_assert (pos <= 3);
9255 template1 = get_template (curr_state->dfa_state, 3);
9256 pos += 3;
9258 if (!asm_p)
9259 /* Emit nops after the current insn. */
9260 for (i = 0; i < curr_state->after_nops_num; i++)
9262 rtx nop_pat = gen_nop ();
9263 rtx_insn *nop = emit_insn_after (nop_pat, insn);
9264 pos--;
9265 gcc_assert (pos >= 0);
9266 if (pos % 3 == 0)
9268 /* We are at the start of a bundle: emit the template
9269 (it should be defined). */
9270 gcc_assert (template0 >= 0);
9271 ia64_add_bundle_selector_before (template0, nop);
9272 /* If we have two bundle window, we make one bundle
9273 rotation. Otherwise template0 will be undefined
9274 (negative value). */
9275 template0 = template1;
9276 template1 = -1;
9279 /* Move the position backward in the window. Group barrier has
9280 no slot. Asm insn takes all bundle. */
9281 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9282 && !unknown_for_bundling_p (insn))
9283 pos--;
9284 /* Long insn takes 2 slots. */
9285 if (ia64_safe_type (insn) == TYPE_L)
9286 pos--;
9287 gcc_assert (pos >= 0);
9288 if (pos % 3 == 0
9289 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9290 && !unknown_for_bundling_p (insn))
9292 /* The current insn is at the bundle start: emit the
9293 template. */
9294 gcc_assert (template0 >= 0);
9295 ia64_add_bundle_selector_before (template0, insn);
9296 b = PREV_INSN (insn);
9297 insn = b;
9298 /* See comment above in analogous place for emitting nops
9299 after the insn. */
9300 template0 = template1;
9301 template1 = -1;
9303 /* Emit nops after the current insn. */
9304 for (i = 0; i < curr_state->before_nops_num; i++)
9306 rtx nop_pat = gen_nop ();
9307 ia64_emit_insn_before (nop_pat, insn);
9308 rtx_insn *nop = PREV_INSN (insn);
9309 insn = nop;
9310 pos--;
9311 gcc_assert (pos >= 0);
9312 if (pos % 3 == 0)
9314 /* See comment above in analogous place for emitting nops
9315 after the insn. */
9316 gcc_assert (template0 >= 0);
9317 ia64_add_bundle_selector_before (template0, insn);
9318 b = PREV_INSN (insn);
9319 insn = b;
9320 template0 = template1;
9321 template1 = -1;
9326 #ifdef ENABLE_CHECKING
9328 /* Assert right calculation of middle_bundle_stops. */
9329 int num = best_state->middle_bundle_stops;
9330 bool start_bundle = true, end_bundle = false;
9332 for (insn = NEXT_INSN (prev_head_insn);
9333 insn && insn != tail;
9334 insn = NEXT_INSN (insn))
9336 if (!INSN_P (insn))
9337 continue;
9338 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9339 start_bundle = true;
9340 else
9342 rtx_insn *next_insn;
9344 for (next_insn = NEXT_INSN (insn);
9345 next_insn && next_insn != tail;
9346 next_insn = NEXT_INSN (next_insn))
9347 if (INSN_P (next_insn)
9348 && (ia64_safe_itanium_class (next_insn)
9349 != ITANIUM_CLASS_IGNORE
9350 || recog_memoized (next_insn)
9351 == CODE_FOR_bundle_selector)
9352 && GET_CODE (PATTERN (next_insn)) != USE
9353 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9354 break;
9356 end_bundle = next_insn == NULL_RTX
9357 || next_insn == tail
9358 || (INSN_P (next_insn)
9359 && recog_memoized (next_insn)
9360 == CODE_FOR_bundle_selector);
9361 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9362 && !start_bundle && !end_bundle
9363 && next_insn
9364 && !unknown_for_bundling_p (next_insn))
9365 num--;
9367 start_bundle = false;
9371 gcc_assert (num == 0);
9373 #endif
9375 free (index_to_bundle_states);
9376 finish_bundle_state_table ();
9377 bundling_p = 0;
9378 dfa_clean_insn_cache ();
9381 /* The following function is called at the end of scheduling BB or
9382 EBB. After reload, it inserts stop bits and does insn bundling. */
9384 static void
9385 ia64_sched_finish (FILE *dump, int sched_verbose)
9387 if (sched_verbose)
9388 fprintf (dump, "// Finishing schedule.\n");
9389 if (!reload_completed)
9390 return;
9391 if (reload_completed)
9393 final_emit_insn_group_barriers (dump);
9394 bundling (dump, sched_verbose, current_sched_info->prev_head,
9395 current_sched_info->next_tail);
9396 if (sched_verbose && dump)
9397 fprintf (dump, "// finishing %d-%d\n",
9398 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9399 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9401 return;
9405 /* The following function inserts stop bits in scheduled BB or EBB. */
9407 static void
9408 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9410 rtx_insn *insn;
9411 int need_barrier_p = 0;
9412 int seen_good_insn = 0;
9414 init_insn_group_barriers ();
9416 for (insn = NEXT_INSN (current_sched_info->prev_head);
9417 insn != current_sched_info->next_tail;
9418 insn = NEXT_INSN (insn))
9420 if (BARRIER_P (insn))
9422 rtx_insn *last = prev_active_insn (insn);
9424 if (! last)
9425 continue;
9426 if (JUMP_TABLE_DATA_P (last))
9427 last = prev_active_insn (last);
9428 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9429 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9431 init_insn_group_barriers ();
9432 seen_good_insn = 0;
9433 need_barrier_p = 0;
9435 else if (NONDEBUG_INSN_P (insn))
9437 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9439 init_insn_group_barriers ();
9440 seen_good_insn = 0;
9441 need_barrier_p = 0;
9443 else if (need_barrier_p || group_barrier_needed (insn)
9444 || (mflag_sched_stop_bits_after_every_cycle
9445 && GET_MODE (insn) == TImode
9446 && seen_good_insn))
9448 if (TARGET_EARLY_STOP_BITS)
9450 rtx_insn *last;
9452 for (last = insn;
9453 last != current_sched_info->prev_head;
9454 last = PREV_INSN (last))
9455 if (INSN_P (last) && GET_MODE (last) == TImode
9456 && stops_p [INSN_UID (last)])
9457 break;
9458 if (last == current_sched_info->prev_head)
9459 last = insn;
9460 last = prev_active_insn (last);
9461 if (last
9462 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9463 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9464 last);
9465 init_insn_group_barriers ();
9466 for (last = NEXT_INSN (last);
9467 last != insn;
9468 last = NEXT_INSN (last))
9469 if (INSN_P (last))
9471 group_barrier_needed (last);
9472 if (recog_memoized (last) >= 0
9473 && important_for_bundling_p (last))
9474 seen_good_insn = 1;
9477 else
9479 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9480 insn);
9481 init_insn_group_barriers ();
9482 seen_good_insn = 0;
9484 group_barrier_needed (insn);
9485 if (recog_memoized (insn) >= 0
9486 && important_for_bundling_p (insn))
9487 seen_good_insn = 1;
9489 else if (recog_memoized (insn) >= 0
9490 && important_for_bundling_p (insn))
9491 seen_good_insn = 1;
9492 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9499 /* If the following function returns TRUE, we will use the DFA
9500 insn scheduler. */
9502 static int
9503 ia64_first_cycle_multipass_dfa_lookahead (void)
9505 return (reload_completed ? 6 : 4);
9508 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9510 static void
9511 ia64_init_dfa_pre_cycle_insn (void)
9513 if (temp_dfa_state == NULL)
9515 dfa_state_size = state_size ();
9516 temp_dfa_state = xmalloc (dfa_state_size);
9517 prev_cycle_state = xmalloc (dfa_state_size);
9519 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9520 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9521 recog_memoized (dfa_pre_cycle_insn);
9522 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9523 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9524 recog_memoized (dfa_stop_insn);
9527 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9528 used by the DFA insn scheduler. */
9530 static rtx
9531 ia64_dfa_pre_cycle_insn (void)
9533 return dfa_pre_cycle_insn;
9536 /* The following function returns TRUE if PRODUCER (of type ilog or
9537 ld) produces address for CONSUMER (of type st or stf). */
9540 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9542 rtx dest, reg, mem;
9544 gcc_assert (producer && consumer);
9545 dest = ia64_single_set (producer);
9546 gcc_assert (dest);
9547 reg = SET_DEST (dest);
9548 gcc_assert (reg);
9549 if (GET_CODE (reg) == SUBREG)
9550 reg = SUBREG_REG (reg);
9551 gcc_assert (GET_CODE (reg) == REG);
9553 dest = ia64_single_set (consumer);
9554 gcc_assert (dest);
9555 mem = SET_DEST (dest);
9556 gcc_assert (mem && GET_CODE (mem) == MEM);
9557 return reg_mentioned_p (reg, mem);
9560 /* The following function returns TRUE if PRODUCER (of type ilog or
9561 ld) produces address for CONSUMER (of type ld or fld). */
9564 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9566 rtx dest, src, reg, mem;
9568 gcc_assert (producer && consumer);
9569 dest = ia64_single_set (producer);
9570 gcc_assert (dest);
9571 reg = SET_DEST (dest);
9572 gcc_assert (reg);
9573 if (GET_CODE (reg) == SUBREG)
9574 reg = SUBREG_REG (reg);
9575 gcc_assert (GET_CODE (reg) == REG);
9577 src = ia64_single_set (consumer);
9578 gcc_assert (src);
9579 mem = SET_SRC (src);
9580 gcc_assert (mem);
9582 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9583 mem = XVECEXP (mem, 0, 0);
9584 else if (GET_CODE (mem) == IF_THEN_ELSE)
9585 /* ??? Is this bypass necessary for ld.c? */
9587 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9588 mem = XEXP (mem, 1);
9591 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9592 mem = XEXP (mem, 0);
9594 if (GET_CODE (mem) == UNSPEC)
9596 int c = XINT (mem, 1);
9598 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9599 || c == UNSPEC_LDSA);
9600 mem = XVECEXP (mem, 0, 0);
9603 /* Note that LO_SUM is used for GOT loads. */
9604 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9606 return reg_mentioned_p (reg, mem);
9609 /* The following function returns TRUE if INSN produces address for a
9610 load/store insn. We will place such insns into M slot because it
9611 decreases its latency time. */
9614 ia64_produce_address_p (rtx insn)
9616 return insn->call;
9620 /* Emit pseudo-ops for the assembler to describe predicate relations.
9621 At present this assumes that we only consider predicate pairs to
9622 be mutex, and that the assembler can deduce proper values from
9623 straight-line code. */
9625 static void
9626 emit_predicate_relation_info (void)
9628 basic_block bb;
9630 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9632 int r;
9633 rtx_insn *head = BB_HEAD (bb);
9635 /* We only need such notes at code labels. */
9636 if (! LABEL_P (head))
9637 continue;
9638 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9639 head = NEXT_INSN (head);
9641 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9642 grabbing the entire block of predicate registers. */
9643 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9644 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9646 rtx p = gen_rtx_REG (BImode, r);
9647 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9648 if (head == BB_END (bb))
9649 BB_END (bb) = n;
9650 head = n;
9654 /* Look for conditional calls that do not return, and protect predicate
9655 relations around them. Otherwise the assembler will assume the call
9656 returns, and complain about uses of call-clobbered predicates after
9657 the call. */
9658 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9660 rtx_insn *insn = BB_HEAD (bb);
9662 while (1)
9664 if (CALL_P (insn)
9665 && GET_CODE (PATTERN (insn)) == COND_EXEC
9666 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9668 rtx_insn *b =
9669 emit_insn_before (gen_safe_across_calls_all (), insn);
9670 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9671 if (BB_HEAD (bb) == insn)
9672 BB_HEAD (bb) = b;
9673 if (BB_END (bb) == insn)
9674 BB_END (bb) = a;
9677 if (insn == BB_END (bb))
9678 break;
9679 insn = NEXT_INSN (insn);
9684 /* Perform machine dependent operations on the rtl chain INSNS. */
9686 static void
9687 ia64_reorg (void)
9689 /* We are freeing block_for_insn in the toplev to keep compatibility
9690 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9691 compute_bb_for_insn ();
9693 /* If optimizing, we'll have split before scheduling. */
9694 if (optimize == 0)
9695 split_all_insns ();
9697 if (optimize && flag_schedule_insns_after_reload
9698 && dbg_cnt (ia64_sched2))
9700 basic_block bb;
9701 timevar_push (TV_SCHED2);
9702 ia64_final_schedule = 1;
9704 /* We can't let modulo-sched prevent us from scheduling any bbs,
9705 since we need the final schedule to produce bundle information. */
9706 FOR_EACH_BB_FN (bb, cfun)
9707 bb->flags &= ~BB_DISABLE_SCHEDULE;
9709 initiate_bundle_states ();
9710 ia64_nop = make_insn_raw (gen_nop ());
9711 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9712 recog_memoized (ia64_nop);
9713 clocks_length = get_max_uid () + 1;
9714 stops_p = XCNEWVEC (char, clocks_length);
9716 if (ia64_tune == PROCESSOR_ITANIUM2)
9718 pos_1 = get_cpu_unit_code ("2_1");
9719 pos_2 = get_cpu_unit_code ("2_2");
9720 pos_3 = get_cpu_unit_code ("2_3");
9721 pos_4 = get_cpu_unit_code ("2_4");
9722 pos_5 = get_cpu_unit_code ("2_5");
9723 pos_6 = get_cpu_unit_code ("2_6");
9724 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9725 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9726 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9727 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9728 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9729 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9730 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9731 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9732 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9733 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9734 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9735 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9736 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9737 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9738 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9739 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9740 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9741 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9742 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9743 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9745 else
9747 pos_1 = get_cpu_unit_code ("1_1");
9748 pos_2 = get_cpu_unit_code ("1_2");
9749 pos_3 = get_cpu_unit_code ("1_3");
9750 pos_4 = get_cpu_unit_code ("1_4");
9751 pos_5 = get_cpu_unit_code ("1_5");
9752 pos_6 = get_cpu_unit_code ("1_6");
9753 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9754 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9755 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9756 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9757 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9758 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9759 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9760 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9761 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9762 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9763 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9764 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9765 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9766 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9767 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9768 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9769 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9770 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9771 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9772 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9775 if (flag_selective_scheduling2
9776 && !maybe_skip_selective_scheduling ())
9777 run_selective_scheduling ();
9778 else
9779 schedule_ebbs ();
9781 /* Redo alignment computation, as it might gone wrong. */
9782 compute_alignments ();
9784 /* We cannot reuse this one because it has been corrupted by the
9785 evil glat. */
9786 finish_bundle_states ();
9787 free (stops_p);
9788 stops_p = NULL;
9789 emit_insn_group_barriers (dump_file);
9791 ia64_final_schedule = 0;
9792 timevar_pop (TV_SCHED2);
9794 else
9795 emit_all_insn_group_barriers (dump_file);
9797 df_analyze ();
9799 /* A call must not be the last instruction in a function, so that the
9800 return address is still within the function, so that unwinding works
9801 properly. Note that IA-64 differs from dwarf2 on this point. */
9802 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9804 rtx_insn *insn;
9805 int saw_stop = 0;
9807 insn = get_last_insn ();
9808 if (! INSN_P (insn))
9809 insn = prev_active_insn (insn);
9810 if (insn)
9812 /* Skip over insns that expand to nothing. */
9813 while (NONJUMP_INSN_P (insn)
9814 && get_attr_empty (insn) == EMPTY_YES)
9816 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9817 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9818 saw_stop = 1;
9819 insn = prev_active_insn (insn);
9821 if (CALL_P (insn))
9823 if (! saw_stop)
9824 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9825 emit_insn (gen_break_f ());
9826 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9831 emit_predicate_relation_info ();
9833 if (flag_var_tracking)
9835 timevar_push (TV_VAR_TRACKING);
9836 variable_tracking_main ();
9837 timevar_pop (TV_VAR_TRACKING);
9839 df_finish_pass (false);
9842 /* Return true if REGNO is used by the epilogue. */
9845 ia64_epilogue_uses (int regno)
9847 switch (regno)
9849 case R_GR (1):
9850 /* With a call to a function in another module, we will write a new
9851 value to "gp". After returning from such a call, we need to make
9852 sure the function restores the original gp-value, even if the
9853 function itself does not use the gp anymore. */
9854 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9856 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9857 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9858 /* For functions defined with the syscall_linkage attribute, all
9859 input registers are marked as live at all function exits. This
9860 prevents the register allocator from using the input registers,
9861 which in turn makes it possible to restart a system call after
9862 an interrupt without having to save/restore the input registers.
9863 This also prevents kernel data from leaking to application code. */
9864 return lookup_attribute ("syscall_linkage",
9865 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9867 case R_BR (0):
9868 /* Conditional return patterns can't represent the use of `b0' as
9869 the return address, so we force the value live this way. */
9870 return 1;
9872 case AR_PFS_REGNUM:
9873 /* Likewise for ar.pfs, which is used by br.ret. */
9874 return 1;
9876 default:
9877 return 0;
9881 /* Return true if REGNO is used by the frame unwinder. */
9884 ia64_eh_uses (int regno)
9886 unsigned int r;
9888 if (! reload_completed)
9889 return 0;
9891 if (regno == 0)
9892 return 0;
9894 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9895 if (regno == current_frame_info.r[r]
9896 || regno == emitted_frame_related_regs[r])
9897 return 1;
9899 return 0;
9902 /* Return true if this goes in small data/bss. */
9904 /* ??? We could also support own long data here. Generating movl/add/ld8
9905 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9906 code faster because there is one less load. This also includes incomplete
9907 types which can't go in sdata/sbss. */
9909 static bool
9910 ia64_in_small_data_p (const_tree exp)
9912 if (TARGET_NO_SDATA)
9913 return false;
9915 /* We want to merge strings, so we never consider them small data. */
9916 if (TREE_CODE (exp) == STRING_CST)
9917 return false;
9919 /* Functions are never small data. */
9920 if (TREE_CODE (exp) == FUNCTION_DECL)
9921 return false;
9923 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9925 const char *section = DECL_SECTION_NAME (exp);
9927 if (strcmp (section, ".sdata") == 0
9928 || strncmp (section, ".sdata.", 7) == 0
9929 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9930 || strcmp (section, ".sbss") == 0
9931 || strncmp (section, ".sbss.", 6) == 0
9932 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9933 return true;
9935 else
9937 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9939 /* If this is an incomplete type with size 0, then we can't put it
9940 in sdata because it might be too big when completed. */
9941 if (size > 0 && size <= ia64_section_threshold)
9942 return true;
9945 return false;
9948 /* Output assembly directives for prologue regions. */
9950 /* The current basic block number. */
9952 static bool last_block;
9954 /* True if we need a copy_state command at the start of the next block. */
9956 static bool need_copy_state;
9958 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9959 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9960 #endif
9962 /* The function emits unwind directives for the start of an epilogue. */
9964 static void
9965 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9966 bool unwind, bool frame ATTRIBUTE_UNUSED)
9968 /* If this isn't the last block of the function, then we need to label the
9969 current state, and copy it back in at the start of the next block. */
9971 if (!last_block)
9973 if (unwind)
9974 fprintf (asm_out_file, "\t.label_state %d\n",
9975 ++cfun->machine->state_num);
9976 need_copy_state = true;
9979 if (unwind)
9980 fprintf (asm_out_file, "\t.restore sp\n");
9983 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9985 static void
9986 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9987 bool unwind, bool frame)
9989 rtx dest = SET_DEST (pat);
9990 rtx src = SET_SRC (pat);
9992 if (dest == stack_pointer_rtx)
9994 if (GET_CODE (src) == PLUS)
9996 rtx op0 = XEXP (src, 0);
9997 rtx op1 = XEXP (src, 1);
9999 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10001 if (INTVAL (op1) < 0)
10003 gcc_assert (!frame_pointer_needed);
10004 if (unwind)
10005 fprintf (asm_out_file,
10006 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
10007 -INTVAL (op1));
10009 else
10010 process_epilogue (asm_out_file, insn, unwind, frame);
10012 else
10014 gcc_assert (src == hard_frame_pointer_rtx);
10015 process_epilogue (asm_out_file, insn, unwind, frame);
10018 else if (dest == hard_frame_pointer_rtx)
10020 gcc_assert (src == stack_pointer_rtx);
10021 gcc_assert (frame_pointer_needed);
10023 if (unwind)
10024 fprintf (asm_out_file, "\t.vframe r%d\n",
10025 ia64_dbx_register_number (REGNO (dest)));
10027 else
10028 gcc_unreachable ();
10031 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10033 static void
10034 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10036 rtx dest = SET_DEST (pat);
10037 rtx src = SET_SRC (pat);
10038 int dest_regno = REGNO (dest);
10039 int src_regno;
10041 if (src == pc_rtx)
10043 /* Saving return address pointer. */
10044 if (unwind)
10045 fprintf (asm_out_file, "\t.save rp, r%d\n",
10046 ia64_dbx_register_number (dest_regno));
10047 return;
10050 src_regno = REGNO (src);
10052 switch (src_regno)
10054 case PR_REG (0):
10055 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10056 if (unwind)
10057 fprintf (asm_out_file, "\t.save pr, r%d\n",
10058 ia64_dbx_register_number (dest_regno));
10059 break;
10061 case AR_UNAT_REGNUM:
10062 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10063 if (unwind)
10064 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10065 ia64_dbx_register_number (dest_regno));
10066 break;
10068 case AR_LC_REGNUM:
10069 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10070 if (unwind)
10071 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10072 ia64_dbx_register_number (dest_regno));
10073 break;
10075 default:
10076 /* Everything else should indicate being stored to memory. */
10077 gcc_unreachable ();
10081 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10083 static void
10084 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10086 rtx dest = SET_DEST (pat);
10087 rtx src = SET_SRC (pat);
10088 int src_regno = REGNO (src);
10089 const char *saveop;
10090 HOST_WIDE_INT off;
10091 rtx base;
10093 gcc_assert (MEM_P (dest));
10094 if (GET_CODE (XEXP (dest, 0)) == REG)
10096 base = XEXP (dest, 0);
10097 off = 0;
10099 else
10101 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10102 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10103 base = XEXP (XEXP (dest, 0), 0);
10104 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10107 if (base == hard_frame_pointer_rtx)
10109 saveop = ".savepsp";
10110 off = - off;
10112 else
10114 gcc_assert (base == stack_pointer_rtx);
10115 saveop = ".savesp";
10118 src_regno = REGNO (src);
10119 switch (src_regno)
10121 case BR_REG (0):
10122 gcc_assert (!current_frame_info.r[reg_save_b0]);
10123 if (unwind)
10124 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10125 saveop, off);
10126 break;
10128 case PR_REG (0):
10129 gcc_assert (!current_frame_info.r[reg_save_pr]);
10130 if (unwind)
10131 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10132 saveop, off);
10133 break;
10135 case AR_LC_REGNUM:
10136 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10137 if (unwind)
10138 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10139 saveop, off);
10140 break;
10142 case AR_PFS_REGNUM:
10143 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10144 if (unwind)
10145 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10146 saveop, off);
10147 break;
10149 case AR_UNAT_REGNUM:
10150 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10151 if (unwind)
10152 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10153 saveop, off);
10154 break;
10156 case GR_REG (4):
10157 case GR_REG (5):
10158 case GR_REG (6):
10159 case GR_REG (7):
10160 if (unwind)
10161 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10162 1 << (src_regno - GR_REG (4)));
10163 break;
10165 case BR_REG (1):
10166 case BR_REG (2):
10167 case BR_REG (3):
10168 case BR_REG (4):
10169 case BR_REG (5):
10170 if (unwind)
10171 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10172 1 << (src_regno - BR_REG (1)));
10173 break;
10175 case FR_REG (2):
10176 case FR_REG (3):
10177 case FR_REG (4):
10178 case FR_REG (5):
10179 if (unwind)
10180 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10181 1 << (src_regno - FR_REG (2)));
10182 break;
10184 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10185 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10186 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10187 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10188 if (unwind)
10189 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10190 1 << (src_regno - FR_REG (12)));
10191 break;
10193 default:
10194 /* ??? For some reason we mark other general registers, even those
10195 we can't represent in the unwind info. Ignore them. */
10196 break;
10200 /* This function looks at a single insn and emits any directives
10201 required to unwind this insn. */
10203 static void
10204 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10206 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10207 bool frame = dwarf2out_do_frame ();
10208 rtx note, pat;
10209 bool handled_one;
10211 if (!unwind && !frame)
10212 return;
10214 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10216 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10217 == EXIT_BLOCK_PTR_FOR_FN (cfun);
10219 /* Restore unwind state from immediately before the epilogue. */
10220 if (need_copy_state)
10222 if (unwind)
10224 fprintf (asm_out_file, "\t.body\n");
10225 fprintf (asm_out_file, "\t.copy_state %d\n",
10226 cfun->machine->state_num);
10228 need_copy_state = false;
10232 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10233 return;
10235 /* Look for the ALLOC insn. */
10236 if (INSN_CODE (insn) == CODE_FOR_alloc)
10238 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10239 int dest_regno = REGNO (dest);
10241 /* If this is the final destination for ar.pfs, then this must
10242 be the alloc in the prologue. */
10243 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10245 if (unwind)
10246 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10247 ia64_dbx_register_number (dest_regno));
10249 else
10251 /* This must be an alloc before a sibcall. We must drop the
10252 old frame info. The easiest way to drop the old frame
10253 info is to ensure we had a ".restore sp" directive
10254 followed by a new prologue. If the procedure doesn't
10255 have a memory-stack frame, we'll issue a dummy ".restore
10256 sp" now. */
10257 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10258 /* if haven't done process_epilogue() yet, do it now */
10259 process_epilogue (asm_out_file, insn, unwind, frame);
10260 if (unwind)
10261 fprintf (asm_out_file, "\t.prologue\n");
10263 return;
10266 handled_one = false;
10267 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10268 switch (REG_NOTE_KIND (note))
10270 case REG_CFA_ADJUST_CFA:
10271 pat = XEXP (note, 0);
10272 if (pat == NULL)
10273 pat = PATTERN (insn);
10274 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10275 handled_one = true;
10276 break;
10278 case REG_CFA_OFFSET:
10279 pat = XEXP (note, 0);
10280 if (pat == NULL)
10281 pat = PATTERN (insn);
10282 process_cfa_offset (asm_out_file, pat, unwind);
10283 handled_one = true;
10284 break;
10286 case REG_CFA_REGISTER:
10287 pat = XEXP (note, 0);
10288 if (pat == NULL)
10289 pat = PATTERN (insn);
10290 process_cfa_register (asm_out_file, pat, unwind);
10291 handled_one = true;
10292 break;
10294 case REG_FRAME_RELATED_EXPR:
10295 case REG_CFA_DEF_CFA:
10296 case REG_CFA_EXPRESSION:
10297 case REG_CFA_RESTORE:
10298 case REG_CFA_SET_VDRAP:
10299 /* Not used in the ia64 port. */
10300 gcc_unreachable ();
10302 default:
10303 /* Not a frame-related note. */
10304 break;
10307 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10308 explicit action to take. No guessing required. */
10309 gcc_assert (handled_one);
10312 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10314 static void
10315 ia64_asm_emit_except_personality (rtx personality)
10317 fputs ("\t.personality\t", asm_out_file);
10318 output_addr_const (asm_out_file, personality);
10319 fputc ('\n', asm_out_file);
10322 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10324 static void
10325 ia64_asm_init_sections (void)
10327 exception_section = get_unnamed_section (0, output_section_asm_op,
10328 "\t.handlerdata");
10331 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10333 static enum unwind_info_type
10334 ia64_debug_unwind_info (void)
10336 return UI_TARGET;
10339 enum ia64_builtins
10341 IA64_BUILTIN_BSP,
10342 IA64_BUILTIN_COPYSIGNQ,
10343 IA64_BUILTIN_FABSQ,
10344 IA64_BUILTIN_FLUSHRS,
10345 IA64_BUILTIN_INFQ,
10346 IA64_BUILTIN_HUGE_VALQ,
10347 IA64_BUILTIN_max
10350 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10352 void
10353 ia64_init_builtins (void)
10355 tree fpreg_type;
10356 tree float80_type;
10357 tree decl;
10359 /* The __fpreg type. */
10360 fpreg_type = make_node (REAL_TYPE);
10361 TYPE_PRECISION (fpreg_type) = 82;
10362 layout_type (fpreg_type);
10363 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10365 /* The __float80 type. */
10366 float80_type = make_node (REAL_TYPE);
10367 TYPE_PRECISION (float80_type) = 80;
10368 layout_type (float80_type);
10369 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10371 /* The __float128 type. */
10372 if (!TARGET_HPUX)
10374 tree ftype;
10375 tree float128_type = make_node (REAL_TYPE);
10377 TYPE_PRECISION (float128_type) = 128;
10378 layout_type (float128_type);
10379 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10381 /* TFmode support builtins. */
10382 ftype = build_function_type_list (float128_type, NULL_TREE);
10383 decl = add_builtin_function ("__builtin_infq", ftype,
10384 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10385 NULL, NULL_TREE);
10386 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10388 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10389 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10390 NULL, NULL_TREE);
10391 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10393 ftype = build_function_type_list (float128_type,
10394 float128_type,
10395 NULL_TREE);
10396 decl = add_builtin_function ("__builtin_fabsq", ftype,
10397 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10398 "__fabstf2", NULL_TREE);
10399 TREE_READONLY (decl) = 1;
10400 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10402 ftype = build_function_type_list (float128_type,
10403 float128_type,
10404 float128_type,
10405 NULL_TREE);
10406 decl = add_builtin_function ("__builtin_copysignq", ftype,
10407 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10408 "__copysigntf3", NULL_TREE);
10409 TREE_READONLY (decl) = 1;
10410 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10412 else
10413 /* Under HPUX, this is a synonym for "long double". */
10414 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10415 "__float128");
10417 /* Fwrite on VMS is non-standard. */
10418 #if TARGET_ABI_OPEN_VMS
10419 vms_patch_builtins ();
10420 #endif
10422 #define def_builtin(name, type, code) \
10423 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10424 NULL, NULL_TREE)
10426 decl = def_builtin ("__builtin_ia64_bsp",
10427 build_function_type_list (ptr_type_node, NULL_TREE),
10428 IA64_BUILTIN_BSP);
10429 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10431 decl = def_builtin ("__builtin_ia64_flushrs",
10432 build_function_type_list (void_type_node, NULL_TREE),
10433 IA64_BUILTIN_FLUSHRS);
10434 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10436 #undef def_builtin
10438 if (TARGET_HPUX)
10440 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10441 set_user_assembler_name (decl, "_Isfinite");
10442 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10443 set_user_assembler_name (decl, "_Isfinitef");
10444 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10445 set_user_assembler_name (decl, "_Isfinitef128");
10450 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10451 machine_mode mode ATTRIBUTE_UNUSED,
10452 int ignore ATTRIBUTE_UNUSED)
10454 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10455 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10457 switch (fcode)
10459 case IA64_BUILTIN_BSP:
10460 if (! target || ! register_operand (target, DImode))
10461 target = gen_reg_rtx (DImode);
10462 emit_insn (gen_bsp_value (target));
10463 #ifdef POINTERS_EXTEND_UNSIGNED
10464 target = convert_memory_address (ptr_mode, target);
10465 #endif
10466 return target;
10468 case IA64_BUILTIN_FLUSHRS:
10469 emit_insn (gen_flushrs ());
10470 return const0_rtx;
10472 case IA64_BUILTIN_INFQ:
10473 case IA64_BUILTIN_HUGE_VALQ:
10475 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10476 REAL_VALUE_TYPE inf;
10477 rtx tmp;
10479 real_inf (&inf);
10480 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10482 tmp = validize_mem (force_const_mem (target_mode, tmp));
10484 if (target == 0)
10485 target = gen_reg_rtx (target_mode);
10487 emit_move_insn (target, tmp);
10488 return target;
10491 case IA64_BUILTIN_FABSQ:
10492 case IA64_BUILTIN_COPYSIGNQ:
10493 return expand_call (exp, target, ignore);
10495 default:
10496 gcc_unreachable ();
10499 return NULL_RTX;
10502 /* Return the ia64 builtin for CODE. */
10504 static tree
10505 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10507 if (code >= IA64_BUILTIN_max)
10508 return error_mark_node;
10510 return ia64_builtins[code];
10513 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10514 most significant bits of the stack slot. */
10516 enum direction
10517 ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
10519 /* Exception to normal case for structures/unions/etc. */
10521 if (type && AGGREGATE_TYPE_P (type)
10522 && int_size_in_bytes (type) < UNITS_PER_WORD)
10523 return upward;
10525 /* Fall back to the default. */
10526 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10529 /* Emit text to declare externally defined variables and functions, because
10530 the Intel assembler does not support undefined externals. */
10532 void
10533 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10535 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10536 set in order to avoid putting out names that are never really
10537 used. */
10538 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10540 /* maybe_assemble_visibility will return 1 if the assembler
10541 visibility directive is output. */
10542 int need_visibility = ((*targetm.binds_local_p) (decl)
10543 && maybe_assemble_visibility (decl));
10545 /* GNU as does not need anything here, but the HP linker does
10546 need something for external functions. */
10547 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10548 && TREE_CODE (decl) == FUNCTION_DECL)
10549 (*targetm.asm_out.globalize_decl_name) (file, decl);
10550 else if (need_visibility && !TARGET_GNU_AS)
10551 (*targetm.asm_out.globalize_label) (file, name);
10555 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10556 modes of word_mode and larger. Rename the TFmode libfuncs using the
10557 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10558 backward compatibility. */
10560 static void
10561 ia64_init_libfuncs (void)
10563 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10564 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10565 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10566 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10568 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10569 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10570 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10571 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10572 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10574 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10575 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10576 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10577 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10578 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10579 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10581 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10582 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10583 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10584 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10585 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10587 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10588 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10589 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10590 /* HP-UX 11.23 libc does not have a function for unsigned
10591 SImode-to-TFmode conversion. */
10592 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10595 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10597 static void
10598 ia64_hpux_init_libfuncs (void)
10600 ia64_init_libfuncs ();
10602 /* The HP SI millicode division and mod functions expect DI arguments.
10603 By turning them off completely we avoid using both libgcc and the
10604 non-standard millicode routines and use the HP DI millicode routines
10605 instead. */
10607 set_optab_libfunc (sdiv_optab, SImode, 0);
10608 set_optab_libfunc (udiv_optab, SImode, 0);
10609 set_optab_libfunc (smod_optab, SImode, 0);
10610 set_optab_libfunc (umod_optab, SImode, 0);
10612 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10613 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10614 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10615 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10617 /* HP-UX libc has TF min/max/abs routines in it. */
10618 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10619 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10620 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10622 /* ia64_expand_compare uses this. */
10623 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10625 /* These should never be used. */
10626 set_optab_libfunc (eq_optab, TFmode, 0);
10627 set_optab_libfunc (ne_optab, TFmode, 0);
10628 set_optab_libfunc (gt_optab, TFmode, 0);
10629 set_optab_libfunc (ge_optab, TFmode, 0);
10630 set_optab_libfunc (lt_optab, TFmode, 0);
10631 set_optab_libfunc (le_optab, TFmode, 0);
10634 /* Rename the division and modulus functions in VMS. */
10636 static void
10637 ia64_vms_init_libfuncs (void)
10639 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10640 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10641 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10642 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10643 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10644 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10645 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10646 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10647 abort_libfunc = init_one_libfunc ("decc$abort");
10648 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10649 #ifdef MEM_LIBFUNCS_INIT
10650 MEM_LIBFUNCS_INIT;
10651 #endif
10654 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10655 the HPUX conventions. */
10657 static void
10658 ia64_sysv4_init_libfuncs (void)
10660 ia64_init_libfuncs ();
10662 /* These functions are not part of the HPUX TFmode interface. We
10663 use them instead of _U_Qfcmp, which doesn't work the way we
10664 expect. */
10665 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10666 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10667 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10668 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10669 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10670 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10672 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10673 glibc doesn't have them. */
10676 /* Use soft-fp. */
10678 static void
10679 ia64_soft_fp_init_libfuncs (void)
10683 static bool
10684 ia64_vms_valid_pointer_mode (machine_mode mode)
10686 return (mode == SImode || mode == DImode);
10689 /* For HPUX, it is illegal to have relocations in shared segments. */
10691 static int
10692 ia64_hpux_reloc_rw_mask (void)
10694 return 3;
10697 /* For others, relax this so that relocations to local data goes in
10698 read-only segments, but we still cannot allow global relocations
10699 in read-only segments. */
10701 static int
10702 ia64_reloc_rw_mask (void)
10704 return flag_pic ? 3 : 2;
10707 /* Return the section to use for X. The only special thing we do here
10708 is to honor small data. */
10710 static section *
10711 ia64_select_rtx_section (machine_mode mode, rtx x,
10712 unsigned HOST_WIDE_INT align)
10714 if (GET_MODE_SIZE (mode) > 0
10715 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10716 && !TARGET_NO_SDATA)
10717 return sdata_section;
10718 else
10719 return default_elf_select_rtx_section (mode, x, align);
10722 static unsigned int
10723 ia64_section_type_flags (tree decl, const char *name, int reloc)
10725 unsigned int flags = 0;
10727 if (strcmp (name, ".sdata") == 0
10728 || strncmp (name, ".sdata.", 7) == 0
10729 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10730 || strncmp (name, ".sdata2.", 8) == 0
10731 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10732 || strcmp (name, ".sbss") == 0
10733 || strncmp (name, ".sbss.", 6) == 0
10734 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10735 flags = SECTION_SMALL;
10737 flags |= default_section_type_flags (decl, name, reloc);
10738 return flags;
10741 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10742 structure type and that the address of that type should be passed
10743 in out0, rather than in r8. */
10745 static bool
10746 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10748 tree ret_type = TREE_TYPE (fntype);
10750 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10751 as the structure return address parameter, if the return value
10752 type has a non-trivial copy constructor or destructor. It is not
10753 clear if this same convention should be used for other
10754 programming languages. Until G++ 3.4, we incorrectly used r8 for
10755 these return values. */
10756 return (abi_version_at_least (2)
10757 && ret_type
10758 && TYPE_MODE (ret_type) == BLKmode
10759 && TREE_ADDRESSABLE (ret_type)
10760 && lang_GNU_CXX ());
10763 /* Output the assembler code for a thunk function. THUNK_DECL is the
10764 declaration for the thunk function itself, FUNCTION is the decl for
10765 the target function. DELTA is an immediate constant offset to be
10766 added to THIS. If VCALL_OFFSET is nonzero, the word at
10767 *(*this + vcall_offset) should be added to THIS. */
10769 static void
10770 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10771 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10772 tree function)
10774 rtx this_rtx, funexp;
10775 rtx_insn *insn;
10776 unsigned int this_parmno;
10777 unsigned int this_regno;
10778 rtx delta_rtx;
10780 reload_completed = 1;
10781 epilogue_completed = 1;
10783 /* Set things up as ia64_expand_prologue might. */
10784 last_scratch_gr_reg = 15;
10786 memset (&current_frame_info, 0, sizeof (current_frame_info));
10787 current_frame_info.spill_cfa_off = -16;
10788 current_frame_info.n_input_regs = 1;
10789 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10791 /* Mark the end of the (empty) prologue. */
10792 emit_note (NOTE_INSN_PROLOGUE_END);
10794 /* Figure out whether "this" will be the first parameter (the
10795 typical case) or the second parameter (as happens when the
10796 virtual function returns certain class objects). */
10797 this_parmno
10798 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10799 ? 1 : 0);
10800 this_regno = IN_REG (this_parmno);
10801 if (!TARGET_REG_NAMES)
10802 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10804 this_rtx = gen_rtx_REG (Pmode, this_regno);
10806 /* Apply the constant offset, if required. */
10807 delta_rtx = GEN_INT (delta);
10808 if (TARGET_ILP32)
10810 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10811 REG_POINTER (tmp) = 1;
10812 if (delta && satisfies_constraint_I (delta_rtx))
10814 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10815 delta = 0;
10817 else
10818 emit_insn (gen_ptr_extend (this_rtx, tmp));
10820 if (delta)
10822 if (!satisfies_constraint_I (delta_rtx))
10824 rtx tmp = gen_rtx_REG (Pmode, 2);
10825 emit_move_insn (tmp, delta_rtx);
10826 delta_rtx = tmp;
10828 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10831 /* Apply the offset from the vtable, if required. */
10832 if (vcall_offset)
10834 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10835 rtx tmp = gen_rtx_REG (Pmode, 2);
10837 if (TARGET_ILP32)
10839 rtx t = gen_rtx_REG (ptr_mode, 2);
10840 REG_POINTER (t) = 1;
10841 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10842 if (satisfies_constraint_I (vcall_offset_rtx))
10844 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10845 vcall_offset = 0;
10847 else
10848 emit_insn (gen_ptr_extend (tmp, t));
10850 else
10851 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10853 if (vcall_offset)
10855 if (!satisfies_constraint_J (vcall_offset_rtx))
10857 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10858 emit_move_insn (tmp2, vcall_offset_rtx);
10859 vcall_offset_rtx = tmp2;
10861 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10864 if (TARGET_ILP32)
10865 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10866 else
10867 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10869 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10872 /* Generate a tail call to the target function. */
10873 if (! TREE_USED (function))
10875 assemble_external (function);
10876 TREE_USED (function) = 1;
10878 funexp = XEXP (DECL_RTL (function), 0);
10879 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10880 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10881 insn = get_last_insn ();
10882 SIBLING_CALL_P (insn) = 1;
10884 /* Code generation for calls relies on splitting. */
10885 reload_completed = 1;
10886 epilogue_completed = 1;
10887 try_split (PATTERN (insn), insn, 0);
10889 emit_barrier ();
10891 /* Run just enough of rest_of_compilation to get the insns emitted.
10892 There's not really enough bulk here to make other passes such as
10893 instruction scheduling worth while. Note that use_thunk calls
10894 assemble_start_function and assemble_end_function. */
10896 emit_all_insn_group_barriers (NULL);
10897 insn = get_insns ();
10898 shorten_branches (insn);
10899 final_start_function (insn, file, 1);
10900 final (insn, file, 1);
10901 final_end_function ();
10903 reload_completed = 0;
10904 epilogue_completed = 0;
10907 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10909 static rtx
10910 ia64_struct_value_rtx (tree fntype,
10911 int incoming ATTRIBUTE_UNUSED)
10913 if (TARGET_ABI_OPEN_VMS ||
10914 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10915 return NULL_RTX;
10916 return gen_rtx_REG (Pmode, GR_REG (8));
10919 static bool
10920 ia64_scalar_mode_supported_p (machine_mode mode)
10922 switch (mode)
10924 case QImode:
10925 case HImode:
10926 case SImode:
10927 case DImode:
10928 case TImode:
10929 return true;
10931 case SFmode:
10932 case DFmode:
10933 case XFmode:
10934 case RFmode:
10935 return true;
10937 case TFmode:
10938 return true;
10940 default:
10941 return false;
10945 static bool
10946 ia64_vector_mode_supported_p (machine_mode mode)
10948 switch (mode)
10950 case V8QImode:
10951 case V4HImode:
10952 case V2SImode:
10953 return true;
10955 case V2SFmode:
10956 return true;
10958 default:
10959 return false;
10963 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P. */
10965 static bool
10966 ia64_libgcc_floating_mode_supported_p (machine_mode mode)
10968 switch (mode)
10970 case SFmode:
10971 case DFmode:
10972 return true;
10974 case XFmode:
10975 #ifdef IA64_NO_LIBGCC_XFMODE
10976 return false;
10977 #else
10978 return true;
10979 #endif
10981 case TFmode:
10982 #ifdef IA64_NO_LIBGCC_TFMODE
10983 return false;
10984 #else
10985 return true;
10986 #endif
10988 default:
10989 return false;
10993 /* Implement the FUNCTION_PROFILER macro. */
10995 void
10996 ia64_output_function_profiler (FILE *file, int labelno)
10998 bool indirect_call;
11000 /* If the function needs a static chain and the static chain
11001 register is r15, we use an indirect call so as to bypass
11002 the PLT stub in case the executable is dynamically linked,
11003 because the stub clobbers r15 as per 5.3.6 of the psABI.
11004 We don't need to do that in non canonical PIC mode. */
11006 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11008 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11009 indirect_call = true;
11011 else
11012 indirect_call = false;
11014 if (TARGET_GNU_AS)
11015 fputs ("\t.prologue 4, r40\n", file);
11016 else
11017 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11018 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11020 if (NO_PROFILE_COUNTERS)
11021 fputs ("\tmov out3 = r0\n", file);
11022 else
11024 char buf[20];
11025 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11027 if (TARGET_AUTO_PIC)
11028 fputs ("\tmovl out3 = @gprel(", file);
11029 else
11030 fputs ("\taddl out3 = @ltoff(", file);
11031 assemble_name (file, buf);
11032 if (TARGET_AUTO_PIC)
11033 fputs (")\n", file);
11034 else
11035 fputs ("), r1\n", file);
11038 if (indirect_call)
11039 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11040 fputs ("\t;;\n", file);
11042 fputs ("\t.save rp, r42\n", file);
11043 fputs ("\tmov out2 = b0\n", file);
11044 if (indirect_call)
11045 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11046 fputs ("\t.body\n", file);
11047 fputs ("\tmov out1 = r1\n", file);
11048 if (indirect_call)
11050 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11051 fputs ("\tmov b6 = r16\n", file);
11052 fputs ("\tld8 r1 = [r14]\n", file);
11053 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11055 else
11056 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11059 static GTY(()) rtx mcount_func_rtx;
11060 static rtx
11061 gen_mcount_func_rtx (void)
11063 if (!mcount_func_rtx)
11064 mcount_func_rtx = init_one_libfunc ("_mcount");
11065 return mcount_func_rtx;
11068 void
11069 ia64_profile_hook (int labelno)
11071 rtx label, ip;
11073 if (NO_PROFILE_COUNTERS)
11074 label = const0_rtx;
11075 else
11077 char buf[30];
11078 const char *label_name;
11079 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11080 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11081 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11082 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11084 ip = gen_reg_rtx (Pmode);
11085 emit_insn (gen_ip_value (ip));
11086 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11087 VOIDmode, 3,
11088 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11089 ip, Pmode,
11090 label, Pmode);
11093 /* Return the mangling of TYPE if it is an extended fundamental type. */
11095 static const char *
11096 ia64_mangle_type (const_tree type)
11098 type = TYPE_MAIN_VARIANT (type);
11100 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11101 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11102 return NULL;
11104 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11105 mangled as "e". */
11106 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11107 return "g";
11108 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11109 an extended mangling. Elsewhere, "e" is available since long
11110 double is 80 bits. */
11111 if (TYPE_MODE (type) == XFmode)
11112 return TARGET_HPUX ? "u9__float80" : "e";
11113 if (TYPE_MODE (type) == RFmode)
11114 return "u7__fpreg";
11115 return NULL;
11118 /* Return the diagnostic message string if conversion from FROMTYPE to
11119 TOTYPE is not allowed, NULL otherwise. */
11120 static const char *
11121 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11123 /* Reject nontrivial conversion to or from __fpreg. */
11124 if (TYPE_MODE (fromtype) == RFmode
11125 && TYPE_MODE (totype) != RFmode
11126 && TYPE_MODE (totype) != VOIDmode)
11127 return N_("invalid conversion from %<__fpreg%>");
11128 if (TYPE_MODE (totype) == RFmode
11129 && TYPE_MODE (fromtype) != RFmode)
11130 return N_("invalid conversion to %<__fpreg%>");
11131 return NULL;
11134 /* Return the diagnostic message string if the unary operation OP is
11135 not permitted on TYPE, NULL otherwise. */
11136 static const char *
11137 ia64_invalid_unary_op (int op, const_tree type)
11139 /* Reject operations on __fpreg other than unary + or &. */
11140 if (TYPE_MODE (type) == RFmode
11141 && op != CONVERT_EXPR
11142 && op != ADDR_EXPR)
11143 return N_("invalid operation on %<__fpreg%>");
11144 return NULL;
11147 /* Return the diagnostic message string if the binary operation OP is
11148 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11149 static const char *
11150 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11152 /* Reject operations on __fpreg. */
11153 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11154 return N_("invalid operation on %<__fpreg%>");
11155 return NULL;
11158 /* HP-UX version_id attribute.
11159 For object foo, if the version_id is set to 1234 put out an alias
11160 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11161 other than an alias statement because it is an illegal symbol name. */
11163 static tree
11164 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11165 tree name ATTRIBUTE_UNUSED,
11166 tree args,
11167 int flags ATTRIBUTE_UNUSED,
11168 bool *no_add_attrs)
11170 tree arg = TREE_VALUE (args);
11172 if (TREE_CODE (arg) != STRING_CST)
11174 error("version attribute is not a string");
11175 *no_add_attrs = true;
11176 return NULL_TREE;
11178 return NULL_TREE;
11181 /* Target hook for c_mode_for_suffix. */
11183 static machine_mode
11184 ia64_c_mode_for_suffix (char suffix)
11186 if (suffix == 'q')
11187 return TFmode;
11188 if (suffix == 'w')
11189 return XFmode;
11191 return VOIDmode;
11194 static GTY(()) rtx ia64_dconst_0_5_rtx;
11197 ia64_dconst_0_5 (void)
11199 if (! ia64_dconst_0_5_rtx)
11201 REAL_VALUE_TYPE rv;
11202 real_from_string (&rv, "0.5");
11203 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11205 return ia64_dconst_0_5_rtx;
11208 static GTY(()) rtx ia64_dconst_0_375_rtx;
11211 ia64_dconst_0_375 (void)
11213 if (! ia64_dconst_0_375_rtx)
11215 REAL_VALUE_TYPE rv;
11216 real_from_string (&rv, "0.375");
11217 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11219 return ia64_dconst_0_375_rtx;
11222 static machine_mode
11223 ia64_get_reg_raw_mode (int regno)
11225 if (FR_REGNO_P (regno))
11226 return XFmode;
11227 return default_get_reg_raw_mode(regno);
11230 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11231 anymore. */
11233 bool
11234 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11236 return TARGET_HPUX && mode == TFmode;
11239 /* Always default to .text section until HP-UX linker is fixed. */
11241 ATTRIBUTE_UNUSED static section *
11242 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11243 enum node_frequency freq ATTRIBUTE_UNUSED,
11244 bool startup ATTRIBUTE_UNUSED,
11245 bool exit ATTRIBUTE_UNUSED)
11247 return NULL;
11250 /* Construct (set target (vec_select op0 (parallel perm))) and
11251 return true if that's a valid instruction in the active ISA. */
11253 static bool
11254 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11256 rtx rperm[MAX_VECT_LEN], x;
11257 unsigned i;
11259 for (i = 0; i < nelt; ++i)
11260 rperm[i] = GEN_INT (perm[i]);
11262 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11263 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11264 x = gen_rtx_SET (target, x);
11266 rtx_insn *insn = emit_insn (x);
11267 if (recog_memoized (insn) < 0)
11269 remove_insn (insn);
11270 return false;
11272 return true;
11275 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11277 static bool
11278 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11279 const unsigned char *perm, unsigned nelt)
11281 machine_mode v2mode;
11282 rtx x;
11284 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11285 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11286 return expand_vselect (target, x, perm, nelt);
11289 /* Try to expand a no-op permutation. */
11291 static bool
11292 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11294 unsigned i, nelt = d->nelt;
11296 for (i = 0; i < nelt; ++i)
11297 if (d->perm[i] != i)
11298 return false;
11300 if (!d->testing_p)
11301 emit_move_insn (d->target, d->op0);
11303 return true;
11306 /* Try to expand D via a shrp instruction. */
11308 static bool
11309 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11311 unsigned i, nelt = d->nelt, shift, mask;
11312 rtx tmp, hi, lo;
11314 /* ??? Don't force V2SFmode into the integer registers. */
11315 if (d->vmode == V2SFmode)
11316 return false;
11318 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11320 shift = d->perm[0];
11321 if (BYTES_BIG_ENDIAN && shift > nelt)
11322 return false;
11324 for (i = 1; i < nelt; ++i)
11325 if (d->perm[i] != ((shift + i) & mask))
11326 return false;
11328 if (d->testing_p)
11329 return true;
11331 hi = shift < nelt ? d->op1 : d->op0;
11332 lo = shift < nelt ? d->op0 : d->op1;
11334 shift %= nelt;
11336 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11338 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11339 gcc_assert (IN_RANGE (shift, 1, 63));
11341 /* Recall that big-endian elements are numbered starting at the top of
11342 the register. Ideally we'd have a shift-left-pair. But since we
11343 don't, convert to a shift the other direction. */
11344 if (BYTES_BIG_ENDIAN)
11345 shift = 64 - shift;
11347 tmp = gen_reg_rtx (DImode);
11348 hi = gen_lowpart (DImode, hi);
11349 lo = gen_lowpart (DImode, lo);
11350 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11352 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11353 return true;
11356 /* Try to instantiate D in a single instruction. */
11358 static bool
11359 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11361 unsigned i, nelt = d->nelt;
11362 unsigned char perm2[MAX_VECT_LEN];
11364 /* Try single-operand selections. */
11365 if (d->one_operand_p)
11367 if (expand_vec_perm_identity (d))
11368 return true;
11369 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11370 return true;
11373 /* Try two operand selections. */
11374 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11375 return true;
11377 /* Recognize interleave style patterns with reversed operands. */
11378 if (!d->one_operand_p)
11380 for (i = 0; i < nelt; ++i)
11382 unsigned e = d->perm[i];
11383 if (e >= nelt)
11384 e -= nelt;
11385 else
11386 e += nelt;
11387 perm2[i] = e;
11390 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11391 return true;
11394 if (expand_vec_perm_shrp (d))
11395 return true;
11397 /* ??? Look for deposit-like permutations where most of the result
11398 comes from one vector unchanged and the rest comes from a
11399 sequential hunk of the other vector. */
11401 return false;
11404 /* Pattern match broadcast permutations. */
11406 static bool
11407 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11409 unsigned i, elt, nelt = d->nelt;
11410 unsigned char perm2[2];
11411 rtx temp;
11412 bool ok;
11414 if (!d->one_operand_p)
11415 return false;
11417 elt = d->perm[0];
11418 for (i = 1; i < nelt; ++i)
11419 if (d->perm[i] != elt)
11420 return false;
11422 switch (d->vmode)
11424 case V2SImode:
11425 case V2SFmode:
11426 /* Implementable by interleave. */
11427 perm2[0] = elt;
11428 perm2[1] = elt + 2;
11429 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11430 gcc_assert (ok);
11431 break;
11433 case V8QImode:
11434 /* Implementable by extract + broadcast. */
11435 if (BYTES_BIG_ENDIAN)
11436 elt = 7 - elt;
11437 elt *= BITS_PER_UNIT;
11438 temp = gen_reg_rtx (DImode);
11439 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11440 GEN_INT (8), GEN_INT (elt)));
11441 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11442 break;
11444 case V4HImode:
11445 /* Should have been matched directly by vec_select. */
11446 default:
11447 gcc_unreachable ();
11450 return true;
11453 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11454 two vector permutation into a single vector permutation by using
11455 an interleave operation to merge the vectors. */
11457 static bool
11458 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11460 struct expand_vec_perm_d dremap, dfinal;
11461 unsigned char remap[2 * MAX_VECT_LEN];
11462 unsigned contents, i, nelt, nelt2;
11463 unsigned h0, h1, h2, h3;
11464 rtx_insn *seq;
11465 bool ok;
11467 if (d->one_operand_p)
11468 return false;
11470 nelt = d->nelt;
11471 nelt2 = nelt / 2;
11473 /* Examine from whence the elements come. */
11474 contents = 0;
11475 for (i = 0; i < nelt; ++i)
11476 contents |= 1u << d->perm[i];
11478 memset (remap, 0xff, sizeof (remap));
11479 dremap = *d;
11481 h0 = (1u << nelt2) - 1;
11482 h1 = h0 << nelt2;
11483 h2 = h0 << nelt;
11484 h3 = h0 << (nelt + nelt2);
11486 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11488 for (i = 0; i < nelt; ++i)
11490 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11491 remap[which] = i;
11492 dremap.perm[i] = which;
11495 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11497 for (i = 0; i < nelt; ++i)
11499 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11500 remap[which] = i;
11501 dremap.perm[i] = which;
11504 else if ((contents & 0x5555) == contents) /* mix even elements */
11506 for (i = 0; i < nelt; ++i)
11508 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11509 remap[which] = i;
11510 dremap.perm[i] = which;
11513 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11515 for (i = 0; i < nelt; ++i)
11517 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11518 remap[which] = i;
11519 dremap.perm[i] = which;
11522 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11524 unsigned shift = ctz_hwi (contents);
11525 for (i = 0; i < nelt; ++i)
11527 unsigned which = (i + shift) & (2 * nelt - 1);
11528 remap[which] = i;
11529 dremap.perm[i] = which;
11532 else
11533 return false;
11535 /* Use the remapping array set up above to move the elements from their
11536 swizzled locations into their final destinations. */
11537 dfinal = *d;
11538 for (i = 0; i < nelt; ++i)
11540 unsigned e = remap[d->perm[i]];
11541 gcc_assert (e < nelt);
11542 dfinal.perm[i] = e;
11544 if (d->testing_p)
11545 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11546 else
11547 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11548 dfinal.op1 = dfinal.op0;
11549 dfinal.one_operand_p = true;
11550 dremap.target = dfinal.op0;
11552 /* Test if the final remap can be done with a single insn. For V4HImode
11553 this *will* succeed. For V8QImode or V2SImode it may not. */
11554 start_sequence ();
11555 ok = expand_vec_perm_1 (&dfinal);
11556 seq = get_insns ();
11557 end_sequence ();
11558 if (!ok)
11559 return false;
11560 if (d->testing_p)
11561 return true;
11563 ok = expand_vec_perm_1 (&dremap);
11564 gcc_assert (ok);
11566 emit_insn (seq);
11567 return true;
11570 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11571 constant permutation via two mux2 and a merge. */
11573 static bool
11574 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11576 unsigned char perm2[4];
11577 rtx rmask[4];
11578 unsigned i;
11579 rtx t0, t1, mask, x;
11580 bool ok;
11582 if (d->vmode != V4HImode || d->one_operand_p)
11583 return false;
11584 if (d->testing_p)
11585 return true;
11587 for (i = 0; i < 4; ++i)
11589 perm2[i] = d->perm[i] & 3;
11590 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11592 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11593 mask = force_reg (V4HImode, mask);
11595 t0 = gen_reg_rtx (V4HImode);
11596 t1 = gen_reg_rtx (V4HImode);
11598 ok = expand_vselect (t0, d->op0, perm2, 4);
11599 gcc_assert (ok);
11600 ok = expand_vselect (t1, d->op1, perm2, 4);
11601 gcc_assert (ok);
11603 x = gen_rtx_AND (V4HImode, mask, t0);
11604 emit_insn (gen_rtx_SET (t0, x));
11606 x = gen_rtx_NOT (V4HImode, mask);
11607 x = gen_rtx_AND (V4HImode, x, t1);
11608 emit_insn (gen_rtx_SET (t1, x));
11610 x = gen_rtx_IOR (V4HImode, t0, t1);
11611 emit_insn (gen_rtx_SET (d->target, x));
11613 return true;
11616 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11617 With all of the interface bits taken care of, perform the expansion
11618 in D and return true on success. */
11620 static bool
11621 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11623 if (expand_vec_perm_1 (d))
11624 return true;
11625 if (expand_vec_perm_broadcast (d))
11626 return true;
11627 if (expand_vec_perm_interleave_2 (d))
11628 return true;
11629 if (expand_vec_perm_v4hi_5 (d))
11630 return true;
11631 return false;
11634 bool
11635 ia64_expand_vec_perm_const (rtx operands[4])
11637 struct expand_vec_perm_d d;
11638 unsigned char perm[MAX_VECT_LEN];
11639 int i, nelt, which;
11640 rtx sel;
11642 d.target = operands[0];
11643 d.op0 = operands[1];
11644 d.op1 = operands[2];
11645 sel = operands[3];
11647 d.vmode = GET_MODE (d.target);
11648 gcc_assert (VECTOR_MODE_P (d.vmode));
11649 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11650 d.testing_p = false;
11652 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11653 gcc_assert (XVECLEN (sel, 0) == nelt);
11654 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11656 for (i = which = 0; i < nelt; ++i)
11658 rtx e = XVECEXP (sel, 0, i);
11659 int ei = INTVAL (e) & (2 * nelt - 1);
11661 which |= (ei < nelt ? 1 : 2);
11662 d.perm[i] = ei;
11663 perm[i] = ei;
11666 switch (which)
11668 default:
11669 gcc_unreachable();
11671 case 3:
11672 if (!rtx_equal_p (d.op0, d.op1))
11674 d.one_operand_p = false;
11675 break;
11678 /* The elements of PERM do not suggest that only the first operand
11679 is used, but both operands are identical. Allow easier matching
11680 of the permutation by folding the permutation into the single
11681 input vector. */
11682 for (i = 0; i < nelt; ++i)
11683 if (d.perm[i] >= nelt)
11684 d.perm[i] -= nelt;
11685 /* FALLTHRU */
11687 case 1:
11688 d.op1 = d.op0;
11689 d.one_operand_p = true;
11690 break;
11692 case 2:
11693 for (i = 0; i < nelt; ++i)
11694 d.perm[i] -= nelt;
11695 d.op0 = d.op1;
11696 d.one_operand_p = true;
11697 break;
11700 if (ia64_expand_vec_perm_const_1 (&d))
11701 return true;
11703 /* If the mask says both arguments are needed, but they are the same,
11704 the above tried to expand with one_operand_p true. If that didn't
11705 work, retry with one_operand_p false, as that's what we used in _ok. */
11706 if (which == 3 && d.one_operand_p)
11708 memcpy (d.perm, perm, sizeof (perm));
11709 d.one_operand_p = false;
11710 return ia64_expand_vec_perm_const_1 (&d);
11713 return false;
11716 /* Implement targetm.vectorize.vec_perm_const_ok. */
11718 static bool
11719 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11720 const unsigned char *sel)
11722 struct expand_vec_perm_d d;
11723 unsigned int i, nelt, which;
11724 bool ret;
11726 d.vmode = vmode;
11727 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11728 d.testing_p = true;
11730 /* Extract the values from the vector CST into the permutation
11731 array in D. */
11732 memcpy (d.perm, sel, nelt);
11733 for (i = which = 0; i < nelt; ++i)
11735 unsigned char e = d.perm[i];
11736 gcc_assert (e < 2 * nelt);
11737 which |= (e < nelt ? 1 : 2);
11740 /* For all elements from second vector, fold the elements to first. */
11741 if (which == 2)
11742 for (i = 0; i < nelt; ++i)
11743 d.perm[i] -= nelt;
11745 /* Check whether the mask can be applied to the vector type. */
11746 d.one_operand_p = (which != 3);
11748 /* Otherwise we have to go through the motions and see if we can
11749 figure out how to generate the requested permutation. */
11750 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11751 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11752 if (!d.one_operand_p)
11753 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11755 start_sequence ();
11756 ret = ia64_expand_vec_perm_const_1 (&d);
11757 end_sequence ();
11759 return ret;
11762 void
11763 ia64_expand_vec_setv2sf (rtx operands[3])
11765 struct expand_vec_perm_d d;
11766 unsigned int which;
11767 bool ok;
11769 d.target = operands[0];
11770 d.op0 = operands[0];
11771 d.op1 = gen_reg_rtx (V2SFmode);
11772 d.vmode = V2SFmode;
11773 d.nelt = 2;
11774 d.one_operand_p = false;
11775 d.testing_p = false;
11777 which = INTVAL (operands[2]);
11778 gcc_assert (which <= 1);
11779 d.perm[0] = 1 - which;
11780 d.perm[1] = which + 2;
11782 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11784 ok = ia64_expand_vec_perm_const_1 (&d);
11785 gcc_assert (ok);
11788 void
11789 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11791 struct expand_vec_perm_d d;
11792 machine_mode vmode = GET_MODE (target);
11793 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11794 bool ok;
11796 d.target = target;
11797 d.op0 = op0;
11798 d.op1 = op1;
11799 d.vmode = vmode;
11800 d.nelt = nelt;
11801 d.one_operand_p = false;
11802 d.testing_p = false;
11804 for (i = 0; i < nelt; ++i)
11805 d.perm[i] = i * 2 + odd;
11807 ok = ia64_expand_vec_perm_const_1 (&d);
11808 gcc_assert (ok);
11811 #include "gt-ia64.h"