2014-12-19 Andrew MacLeod <amacleod@redhat.com>
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob36f99d2c5e5244a65d73fcd3baa5a3a0799b41c5
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2014 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "insn-codes.h"
42 #include "optabs.h"
43 #include "except.h"
44 #include "hashtab.h"
45 #include "hash-set.h"
46 #include "vec.h"
47 #include "machmode.h"
48 #include "input.h"
49 #include "function.h"
50 #include "ggc.h"
51 #include "predict.h"
52 #include "dominance.h"
53 #include "cfg.h"
54 #include "cfgrtl.h"
55 #include "cfganal.h"
56 #include "lcm.h"
57 #include "cfgbuild.h"
58 #include "cfgcleanup.h"
59 #include "basic-block.h"
60 #include "libfuncs.h"
61 #include "diagnostic-core.h"
62 #include "sched-int.h"
63 #include "timevar.h"
64 #include "target.h"
65 #include "target-def.h"
66 #include "common/common-target.h"
67 #include "tm_p.h"
68 #include "hash-table.h"
69 #include "langhooks.h"
70 #include "tree-ssa-alias.h"
71 #include "internal-fn.h"
72 #include "gimple-fold.h"
73 #include "tree-eh.h"
74 #include "gimple-expr.h"
75 #include "is-a.h"
76 #include "gimple.h"
77 #include "gimplify.h"
78 #include "intl.h"
79 #include "df.h"
80 #include "debug.h"
81 #include "params.h"
82 #include "dbgcnt.h"
83 #include "tm-constrs.h"
84 #include "sel-sched.h"
85 #include "reload.h"
86 #include "opts.h"
87 #include "dumpfile.h"
88 #include "builtins.h"
90 /* This is used for communication between ASM_OUTPUT_LABEL and
91 ASM_OUTPUT_LABELREF. */
92 int ia64_asm_output_label = 0;
94 /* Register names for ia64_expand_prologue. */
95 static const char * const ia64_reg_numbers[96] =
96 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
97 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
98 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
99 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
100 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
101 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
102 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
103 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
104 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
105 "r104","r105","r106","r107","r108","r109","r110","r111",
106 "r112","r113","r114","r115","r116","r117","r118","r119",
107 "r120","r121","r122","r123","r124","r125","r126","r127"};
109 /* ??? These strings could be shared with REGISTER_NAMES. */
110 static const char * const ia64_input_reg_names[8] =
111 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
113 /* ??? These strings could be shared with REGISTER_NAMES. */
114 static const char * const ia64_local_reg_names[80] =
115 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
116 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
117 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
118 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
119 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
120 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
121 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
122 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
123 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
124 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
126 /* ??? These strings could be shared with REGISTER_NAMES. */
127 static const char * const ia64_output_reg_names[8] =
128 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
130 /* Variables which are this size or smaller are put in the sdata/sbss
131 sections. */
133 unsigned int ia64_section_threshold;
135 /* The following variable is used by the DFA insn scheduler. The value is
136 TRUE if we do insn bundling instead of insn scheduling. */
137 int bundling_p = 0;
139 enum ia64_frame_regs
141 reg_fp,
142 reg_save_b0,
143 reg_save_pr,
144 reg_save_ar_pfs,
145 reg_save_ar_unat,
146 reg_save_ar_lc,
147 reg_save_gp,
148 number_of_ia64_frame_regs
151 /* Structure to be filled in by ia64_compute_frame_size with register
152 save masks and offsets for the current function. */
154 struct ia64_frame_info
156 HOST_WIDE_INT total_size; /* size of the stack frame, not including
157 the caller's scratch area. */
158 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
159 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
160 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
161 HARD_REG_SET mask; /* mask of saved registers. */
162 unsigned int gr_used_mask; /* mask of registers in use as gr spill
163 registers or long-term scratches. */
164 int n_spilled; /* number of spilled registers. */
165 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
166 int n_input_regs; /* number of input registers used. */
167 int n_local_regs; /* number of local registers used. */
168 int n_output_regs; /* number of output registers used. */
169 int n_rotate_regs; /* number of rotating registers used. */
171 char need_regstk; /* true if a .regstk directive needed. */
172 char initialized; /* true if the data is finalized. */
175 /* Current frame information calculated by ia64_compute_frame_size. */
176 static struct ia64_frame_info current_frame_info;
177 /* The actual registers that are emitted. */
178 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
180 static int ia64_first_cycle_multipass_dfa_lookahead (void);
181 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
182 static void ia64_init_dfa_pre_cycle_insn (void);
183 static rtx ia64_dfa_pre_cycle_insn (void);
184 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
185 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
186 static void ia64_h_i_d_extended (void);
187 static void * ia64_alloc_sched_context (void);
188 static void ia64_init_sched_context (void *, bool);
189 static void ia64_set_sched_context (void *);
190 static void ia64_clear_sched_context (void *);
191 static void ia64_free_sched_context (void *);
192 static int ia64_mode_to_int (machine_mode);
193 static void ia64_set_sched_flags (spec_info_t);
194 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
195 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
196 static bool ia64_skip_rtx_p (const_rtx);
197 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
198 static bool ia64_needs_block_p (ds_t);
199 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
200 static int ia64_spec_check_p (rtx);
201 static int ia64_spec_check_src_p (rtx);
202 static rtx gen_tls_get_addr (void);
203 static rtx gen_thread_pointer (void);
204 static int find_gr_spill (enum ia64_frame_regs, int);
205 static int next_scratch_gr_reg (void);
206 static void mark_reg_gr_used_mask (rtx, void *);
207 static void ia64_compute_frame_size (HOST_WIDE_INT);
208 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
209 static void finish_spill_pointers (void);
210 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
211 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
212 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
213 static rtx gen_movdi_x (rtx, rtx, rtx);
214 static rtx gen_fr_spill_x (rtx, rtx, rtx);
215 static rtx gen_fr_restore_x (rtx, rtx, rtx);
217 static void ia64_option_override (void);
218 static bool ia64_can_eliminate (const int, const int);
219 static machine_mode hfa_element_mode (const_tree, bool);
220 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
221 tree, int *, int);
222 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
223 tree, bool);
224 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
225 const_tree, bool, bool);
226 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
227 const_tree, bool);
228 static rtx ia64_function_incoming_arg (cumulative_args_t,
229 machine_mode, const_tree, bool);
230 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
231 const_tree, bool);
232 static unsigned int ia64_function_arg_boundary (machine_mode,
233 const_tree);
234 static bool ia64_function_ok_for_sibcall (tree, tree);
235 static bool ia64_return_in_memory (const_tree, const_tree);
236 static rtx ia64_function_value (const_tree, const_tree, bool);
237 static rtx ia64_libcall_value (machine_mode, const_rtx);
238 static bool ia64_function_value_regno_p (const unsigned int);
239 static int ia64_register_move_cost (machine_mode, reg_class_t,
240 reg_class_t);
241 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
242 bool);
243 static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
244 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
245 static void fix_range (const char *);
246 static struct machine_function * ia64_init_machine_status (void);
247 static void emit_insn_group_barriers (FILE *);
248 static void emit_all_insn_group_barriers (FILE *);
249 static void final_emit_insn_group_barriers (FILE *);
250 static void emit_predicate_relation_info (void);
251 static void ia64_reorg (void);
252 static bool ia64_in_small_data_p (const_tree);
253 static void process_epilogue (FILE *, rtx, bool, bool);
255 static bool ia64_assemble_integer (rtx, unsigned int, int);
256 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
257 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
258 static void ia64_output_function_end_prologue (FILE *);
260 static void ia64_print_operand (FILE *, rtx, int);
261 static void ia64_print_operand_address (FILE *, rtx);
262 static bool ia64_print_operand_punct_valid_p (unsigned char code);
264 static int ia64_issue_rate (void);
265 static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t);
266 static void ia64_sched_init (FILE *, int, int);
267 static void ia64_sched_init_global (FILE *, int, int);
268 static void ia64_sched_finish_global (FILE *, int);
269 static void ia64_sched_finish (FILE *, int);
270 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
271 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
272 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
273 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
275 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
276 static void ia64_asm_emit_except_personality (rtx);
277 static void ia64_asm_init_sections (void);
279 static enum unwind_info_type ia64_debug_unwind_info (void);
281 static struct bundle_state *get_free_bundle_state (void);
282 static void free_bundle_state (struct bundle_state *);
283 static void initiate_bundle_states (void);
284 static void finish_bundle_states (void);
285 static int insert_bundle_state (struct bundle_state *);
286 static void initiate_bundle_state_table (void);
287 static void finish_bundle_state_table (void);
288 static int try_issue_nops (struct bundle_state *, int);
289 static int try_issue_insn (struct bundle_state *, rtx);
290 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
291 int, int);
292 static int get_max_pos (state_t);
293 static int get_template (state_t, int);
295 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
296 static bool important_for_bundling_p (rtx_insn *);
297 static bool unknown_for_bundling_p (rtx_insn *);
298 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
300 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
301 HOST_WIDE_INT, tree);
302 static void ia64_file_start (void);
303 static void ia64_globalize_decl_name (FILE *, tree);
305 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
306 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
307 static section *ia64_select_rtx_section (machine_mode, rtx,
308 unsigned HOST_WIDE_INT);
309 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
310 ATTRIBUTE_UNUSED;
311 static unsigned int ia64_section_type_flags (tree, const char *, int);
312 static void ia64_init_libfuncs (void)
313 ATTRIBUTE_UNUSED;
314 static void ia64_hpux_init_libfuncs (void)
315 ATTRIBUTE_UNUSED;
316 static void ia64_sysv4_init_libfuncs (void)
317 ATTRIBUTE_UNUSED;
318 static void ia64_vms_init_libfuncs (void)
319 ATTRIBUTE_UNUSED;
320 static void ia64_soft_fp_init_libfuncs (void)
321 ATTRIBUTE_UNUSED;
322 static bool ia64_vms_valid_pointer_mode (machine_mode mode)
323 ATTRIBUTE_UNUSED;
324 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
325 ATTRIBUTE_UNUSED;
327 static bool ia64_attribute_takes_identifier_p (const_tree);
328 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
329 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
330 static void ia64_encode_section_info (tree, rtx, int);
331 static rtx ia64_struct_value_rtx (tree, int);
332 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
333 static bool ia64_scalar_mode_supported_p (machine_mode mode);
334 static bool ia64_vector_mode_supported_p (machine_mode mode);
335 static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode);
336 static bool ia64_legitimate_constant_p (machine_mode, rtx);
337 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
338 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
339 static const char *ia64_mangle_type (const_tree);
340 static const char *ia64_invalid_conversion (const_tree, const_tree);
341 static const char *ia64_invalid_unary_op (int, const_tree);
342 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
343 static machine_mode ia64_c_mode_for_suffix (char);
344 static void ia64_trampoline_init (rtx, tree, rtx);
345 static void ia64_override_options_after_change (void);
346 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
348 static tree ia64_builtin_decl (unsigned, bool);
350 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
351 static machine_mode ia64_get_reg_raw_mode (int regno);
352 static section * ia64_hpux_function_section (tree, enum node_frequency,
353 bool, bool);
355 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
356 const unsigned char *sel);
358 #define MAX_VECT_LEN 8
360 struct expand_vec_perm_d
362 rtx target, op0, op1;
363 unsigned char perm[MAX_VECT_LEN];
364 machine_mode vmode;
365 unsigned char nelt;
366 bool one_operand_p;
367 bool testing_p;
370 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
373 /* Table of valid machine attributes. */
374 static const struct attribute_spec ia64_attribute_table[] =
376 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, decl_handler,
377 type_handler, affects_type_identity } */
378 { "syscall_linkage", 0, 0, false, true, true, NULL, NULL, false },
379 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
380 NULL, false },
381 #if TARGET_ABI_OPEN_VMS
382 { "common_object", 1, 1, true, false, false,
383 ia64_vms_common_object_attribute, NULL, false },
384 #endif
385 { "version_id", 1, 1, true, false, false,
386 ia64_handle_version_id_attribute, NULL, false },
387 { NULL, 0, 0, false, false, false, NULL, NULL, false }
390 /* Initialize the GCC target structure. */
391 #undef TARGET_ATTRIBUTE_TABLE
392 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
394 #undef TARGET_INIT_BUILTINS
395 #define TARGET_INIT_BUILTINS ia64_init_builtins
397 #undef TARGET_EXPAND_BUILTIN
398 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
400 #undef TARGET_BUILTIN_DECL
401 #define TARGET_BUILTIN_DECL ia64_builtin_decl
403 #undef TARGET_ASM_BYTE_OP
404 #define TARGET_ASM_BYTE_OP "\tdata1\t"
405 #undef TARGET_ASM_ALIGNED_HI_OP
406 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
409 #undef TARGET_ASM_ALIGNED_DI_OP
410 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
411 #undef TARGET_ASM_UNALIGNED_HI_OP
412 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
413 #undef TARGET_ASM_UNALIGNED_SI_OP
414 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
415 #undef TARGET_ASM_UNALIGNED_DI_OP
416 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
417 #undef TARGET_ASM_INTEGER
418 #define TARGET_ASM_INTEGER ia64_assemble_integer
420 #undef TARGET_OPTION_OVERRIDE
421 #define TARGET_OPTION_OVERRIDE ia64_option_override
423 #undef TARGET_ASM_FUNCTION_PROLOGUE
424 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
425 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
426 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
427 #undef TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
430 #undef TARGET_PRINT_OPERAND
431 #define TARGET_PRINT_OPERAND ia64_print_operand
432 #undef TARGET_PRINT_OPERAND_ADDRESS
433 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
434 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
435 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
437 #undef TARGET_IN_SMALL_DATA_P
438 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
440 #undef TARGET_SCHED_ADJUST_COST_2
441 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
442 #undef TARGET_SCHED_ISSUE_RATE
443 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
444 #undef TARGET_SCHED_VARIABLE_ISSUE
445 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
446 #undef TARGET_SCHED_INIT
447 #define TARGET_SCHED_INIT ia64_sched_init
448 #undef TARGET_SCHED_FINISH
449 #define TARGET_SCHED_FINISH ia64_sched_finish
450 #undef TARGET_SCHED_INIT_GLOBAL
451 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
452 #undef TARGET_SCHED_FINISH_GLOBAL
453 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
454 #undef TARGET_SCHED_REORDER
455 #define TARGET_SCHED_REORDER ia64_sched_reorder
456 #undef TARGET_SCHED_REORDER2
457 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
459 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
460 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
462 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
463 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
465 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
466 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
467 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
468 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
470 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
471 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
472 ia64_first_cycle_multipass_dfa_lookahead_guard
474 #undef TARGET_SCHED_DFA_NEW_CYCLE
475 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
477 #undef TARGET_SCHED_H_I_D_EXTENDED
478 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
480 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
481 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
483 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
484 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
486 #undef TARGET_SCHED_SET_SCHED_CONTEXT
487 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
489 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
490 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
492 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
493 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
495 #undef TARGET_SCHED_SET_SCHED_FLAGS
496 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
498 #undef TARGET_SCHED_GET_INSN_SPEC_DS
499 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
501 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
502 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
504 #undef TARGET_SCHED_SPECULATE_INSN
505 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
507 #undef TARGET_SCHED_NEEDS_BLOCK_P
508 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
510 #undef TARGET_SCHED_GEN_SPEC_CHECK
511 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
513 #undef TARGET_SCHED_SKIP_RTX_P
514 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
516 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
517 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG ia64_function_arg
522 #undef TARGET_FUNCTION_INCOMING_ARG
523 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
524 #undef TARGET_FUNCTION_ARG_ADVANCE
525 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
526 #undef TARGET_FUNCTION_ARG_BOUNDARY
527 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
529 #undef TARGET_ASM_OUTPUT_MI_THUNK
530 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
531 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
532 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
534 #undef TARGET_ASM_FILE_START
535 #define TARGET_ASM_FILE_START ia64_file_start
537 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
538 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
540 #undef TARGET_REGISTER_MOVE_COST
541 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
542 #undef TARGET_MEMORY_MOVE_COST
543 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
544 #undef TARGET_RTX_COSTS
545 #define TARGET_RTX_COSTS ia64_rtx_costs
546 #undef TARGET_ADDRESS_COST
547 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
549 #undef TARGET_UNSPEC_MAY_TRAP_P
550 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
552 #undef TARGET_MACHINE_DEPENDENT_REORG
553 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
555 #undef TARGET_ENCODE_SECTION_INFO
556 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
558 #undef TARGET_SECTION_TYPE_FLAGS
559 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
561 #ifdef HAVE_AS_TLS
562 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
563 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
564 #endif
566 /* ??? Investigate. */
567 #if 0
568 #undef TARGET_PROMOTE_PROTOTYPES
569 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
570 #endif
572 #undef TARGET_FUNCTION_VALUE
573 #define TARGET_FUNCTION_VALUE ia64_function_value
574 #undef TARGET_LIBCALL_VALUE
575 #define TARGET_LIBCALL_VALUE ia64_libcall_value
576 #undef TARGET_FUNCTION_VALUE_REGNO_P
577 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
579 #undef TARGET_STRUCT_VALUE_RTX
580 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
581 #undef TARGET_RETURN_IN_MEMORY
582 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
583 #undef TARGET_SETUP_INCOMING_VARARGS
584 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
585 #undef TARGET_STRICT_ARGUMENT_NAMING
586 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
587 #undef TARGET_MUST_PASS_IN_STACK
588 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
589 #undef TARGET_GET_RAW_RESULT_MODE
590 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
591 #undef TARGET_GET_RAW_ARG_MODE
592 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
594 #undef TARGET_MEMBER_TYPE_FORCES_BLK
595 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
597 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
598 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
600 #undef TARGET_ASM_UNWIND_EMIT
601 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
602 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
603 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
604 #undef TARGET_ASM_INIT_SECTIONS
605 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
607 #undef TARGET_DEBUG_UNWIND_INFO
608 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
610 #undef TARGET_SCALAR_MODE_SUPPORTED_P
611 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
612 #undef TARGET_VECTOR_MODE_SUPPORTED_P
613 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
615 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
616 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
617 ia64_libgcc_floating_mode_supported_p
619 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
620 in an order different from the specified program order. */
621 #undef TARGET_RELAXED_ORDERING
622 #define TARGET_RELAXED_ORDERING true
624 #undef TARGET_LEGITIMATE_CONSTANT_P
625 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
626 #undef TARGET_LEGITIMATE_ADDRESS_P
627 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
629 #undef TARGET_CANNOT_FORCE_CONST_MEM
630 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
632 #undef TARGET_MANGLE_TYPE
633 #define TARGET_MANGLE_TYPE ia64_mangle_type
635 #undef TARGET_INVALID_CONVERSION
636 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
637 #undef TARGET_INVALID_UNARY_OP
638 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
639 #undef TARGET_INVALID_BINARY_OP
640 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
642 #undef TARGET_C_MODE_FOR_SUFFIX
643 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
645 #undef TARGET_CAN_ELIMINATE
646 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
648 #undef TARGET_TRAMPOLINE_INIT
649 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
651 #undef TARGET_CAN_USE_DOLOOP_P
652 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
653 #undef TARGET_INVALID_WITHIN_DOLOOP
654 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
656 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
657 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
659 #undef TARGET_PREFERRED_RELOAD_CLASS
660 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
662 #undef TARGET_DELAY_SCHED2
663 #define TARGET_DELAY_SCHED2 true
665 /* Variable tracking should be run after all optimizations which
666 change order of insns. It also needs a valid CFG. */
667 #undef TARGET_DELAY_VARTRACK
668 #define TARGET_DELAY_VARTRACK true
670 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
671 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
673 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
674 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
676 struct gcc_target targetm = TARGET_INITIALIZER;
678 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
679 identifier as an argument, so the front end shouldn't look it up. */
681 static bool
682 ia64_attribute_takes_identifier_p (const_tree attr_id)
684 if (is_attribute_p ("model", attr_id))
685 return true;
686 #if TARGET_ABI_OPEN_VMS
687 if (is_attribute_p ("common_object", attr_id))
688 return true;
689 #endif
690 return false;
693 typedef enum
695 ADDR_AREA_NORMAL, /* normal address area */
696 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
698 ia64_addr_area;
700 static GTY(()) tree small_ident1;
701 static GTY(()) tree small_ident2;
703 static void
704 init_idents (void)
706 if (small_ident1 == 0)
708 small_ident1 = get_identifier ("small");
709 small_ident2 = get_identifier ("__small__");
713 /* Retrieve the address area that has been chosen for the given decl. */
715 static ia64_addr_area
716 ia64_get_addr_area (tree decl)
718 tree model_attr;
720 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
721 if (model_attr)
723 tree id;
725 init_idents ();
726 id = TREE_VALUE (TREE_VALUE (model_attr));
727 if (id == small_ident1 || id == small_ident2)
728 return ADDR_AREA_SMALL;
730 return ADDR_AREA_NORMAL;
733 static tree
734 ia64_handle_model_attribute (tree *node, tree name, tree args,
735 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
737 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
738 ia64_addr_area area;
739 tree arg, decl = *node;
741 init_idents ();
742 arg = TREE_VALUE (args);
743 if (arg == small_ident1 || arg == small_ident2)
745 addr_area = ADDR_AREA_SMALL;
747 else
749 warning (OPT_Wattributes, "invalid argument of %qE attribute",
750 name);
751 *no_add_attrs = true;
754 switch (TREE_CODE (decl))
756 case VAR_DECL:
757 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
758 == FUNCTION_DECL)
759 && !TREE_STATIC (decl))
761 error_at (DECL_SOURCE_LOCATION (decl),
762 "an address area attribute cannot be specified for "
763 "local variables");
764 *no_add_attrs = true;
766 area = ia64_get_addr_area (decl);
767 if (area != ADDR_AREA_NORMAL && addr_area != area)
769 error ("address area of %q+D conflicts with previous "
770 "declaration", decl);
771 *no_add_attrs = true;
773 break;
775 case FUNCTION_DECL:
776 error_at (DECL_SOURCE_LOCATION (decl),
777 "address area attribute cannot be specified for "
778 "functions");
779 *no_add_attrs = true;
780 break;
782 default:
783 warning (OPT_Wattributes, "%qE attribute ignored",
784 name);
785 *no_add_attrs = true;
786 break;
789 return NULL_TREE;
792 /* Part of the low level implementation of DEC Ada pragma Common_Object which
793 enables the shared use of variables stored in overlaid linker areas
794 corresponding to the use of Fortran COMMON. */
796 static tree
797 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
798 int flags ATTRIBUTE_UNUSED,
799 bool *no_add_attrs)
801 tree decl = *node;
802 tree id;
804 gcc_assert (DECL_P (decl));
806 DECL_COMMON (decl) = 1;
807 id = TREE_VALUE (args);
808 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
810 error ("%qE attribute requires a string constant argument", name);
811 *no_add_attrs = true;
812 return NULL_TREE;
814 return NULL_TREE;
817 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
819 void
820 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
821 unsigned HOST_WIDE_INT size,
822 unsigned int align)
824 tree attr = DECL_ATTRIBUTES (decl);
826 if (attr)
827 attr = lookup_attribute ("common_object", attr);
828 if (attr)
830 tree id = TREE_VALUE (TREE_VALUE (attr));
831 const char *name;
833 if (TREE_CODE (id) == IDENTIFIER_NODE)
834 name = IDENTIFIER_POINTER (id);
835 else if (TREE_CODE (id) == STRING_CST)
836 name = TREE_STRING_POINTER (id);
837 else
838 abort ();
840 fprintf (file, "\t.vms_common\t\"%s\",", name);
842 else
843 fprintf (file, "%s", COMMON_ASM_OP);
845 /* Code from elfos.h. */
846 assemble_name (file, name);
847 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u",
848 size, align / BITS_PER_UNIT);
850 fputc ('\n', file);
853 static void
854 ia64_encode_addr_area (tree decl, rtx symbol)
856 int flags;
858 flags = SYMBOL_REF_FLAGS (symbol);
859 switch (ia64_get_addr_area (decl))
861 case ADDR_AREA_NORMAL: break;
862 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
863 default: gcc_unreachable ();
865 SYMBOL_REF_FLAGS (symbol) = flags;
868 static void
869 ia64_encode_section_info (tree decl, rtx rtl, int first)
871 default_encode_section_info (decl, rtl, first);
873 /* Careful not to prod global register variables. */
874 if (TREE_CODE (decl) == VAR_DECL
875 && GET_CODE (DECL_RTL (decl)) == MEM
876 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
877 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
878 ia64_encode_addr_area (decl, XEXP (rtl, 0));
881 /* Return 1 if the operands of a move are ok. */
884 ia64_move_ok (rtx dst, rtx src)
886 /* If we're under init_recog_no_volatile, we'll not be able to use
887 memory_operand. So check the code directly and don't worry about
888 the validity of the underlying address, which should have been
889 checked elsewhere anyway. */
890 if (GET_CODE (dst) != MEM)
891 return 1;
892 if (GET_CODE (src) == MEM)
893 return 0;
894 if (register_operand (src, VOIDmode))
895 return 1;
897 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
898 if (INTEGRAL_MODE_P (GET_MODE (dst)))
899 return src == const0_rtx;
900 else
901 return satisfies_constraint_G (src);
904 /* Return 1 if the operands are ok for a floating point load pair. */
907 ia64_load_pair_ok (rtx dst, rtx src)
909 /* ??? There is a thinko in the implementation of the "x" constraint and the
910 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
911 also return false for it. */
912 if (GET_CODE (dst) != REG
913 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
914 return 0;
915 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
916 return 0;
917 switch (GET_CODE (XEXP (src, 0)))
919 case REG:
920 case POST_INC:
921 break;
922 case POST_DEC:
923 return 0;
924 case POST_MODIFY:
926 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
928 if (GET_CODE (adjust) != CONST_INT
929 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
930 return 0;
932 break;
933 default:
934 abort ();
936 return 1;
940 addp4_optimize_ok (rtx op1, rtx op2)
942 return (basereg_operand (op1, GET_MODE(op1)) !=
943 basereg_operand (op2, GET_MODE(op2)));
946 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
947 Return the length of the field, or <= 0 on failure. */
950 ia64_depz_field_mask (rtx rop, rtx rshift)
952 unsigned HOST_WIDE_INT op = INTVAL (rop);
953 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
955 /* Get rid of the zero bits we're shifting in. */
956 op >>= shift;
958 /* We must now have a solid block of 1's at bit 0. */
959 return exact_log2 (op + 1);
962 /* Return the TLS model to use for ADDR. */
964 static enum tls_model
965 tls_symbolic_operand_type (rtx addr)
967 enum tls_model tls_kind = TLS_MODEL_NONE;
969 if (GET_CODE (addr) == CONST)
971 if (GET_CODE (XEXP (addr, 0)) == PLUS
972 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
973 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
975 else if (GET_CODE (addr) == SYMBOL_REF)
976 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
978 return tls_kind;
981 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
982 as a base register. */
984 static inline bool
985 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
987 if (strict
988 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
989 return true;
990 else if (!strict
991 && (GENERAL_REGNO_P (REGNO (reg))
992 || !HARD_REGISTER_P (reg)))
993 return true;
994 else
995 return false;
998 static bool
999 ia64_legitimate_address_reg (const_rtx reg, bool strict)
1001 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1002 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1003 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1004 return true;
1006 return false;
1009 static bool
1010 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1012 if (GET_CODE (disp) == PLUS
1013 && rtx_equal_p (reg, XEXP (disp, 0))
1014 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1015 || (CONST_INT_P (XEXP (disp, 1))
1016 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1017 return true;
1019 return false;
1022 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1024 static bool
1025 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1026 rtx x, bool strict)
1028 if (ia64_legitimate_address_reg (x, strict))
1029 return true;
1030 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1031 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1032 && XEXP (x, 0) != arg_pointer_rtx)
1033 return true;
1034 else if (GET_CODE (x) == POST_MODIFY
1035 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1036 && XEXP (x, 0) != arg_pointer_rtx
1037 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1038 return true;
1039 else
1040 return false;
1043 /* Return true if X is a constant that is valid for some immediate
1044 field in an instruction. */
1046 static bool
1047 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1049 switch (GET_CODE (x))
1051 case CONST_INT:
1052 case LABEL_REF:
1053 return true;
1055 case CONST_DOUBLE:
1056 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1057 return true;
1058 return satisfies_constraint_G (x);
1060 case CONST:
1061 case SYMBOL_REF:
1062 /* ??? Short term workaround for PR 28490. We must make the code here
1063 match the code in ia64_expand_move and move_operand, even though they
1064 are both technically wrong. */
1065 if (tls_symbolic_operand_type (x) == 0)
1067 HOST_WIDE_INT addend = 0;
1068 rtx op = x;
1070 if (GET_CODE (op) == CONST
1071 && GET_CODE (XEXP (op, 0)) == PLUS
1072 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1074 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1075 op = XEXP (XEXP (op, 0), 0);
1078 if (any_offset_symbol_operand (op, mode)
1079 || function_operand (op, mode))
1080 return true;
1081 if (aligned_offset_symbol_operand (op, mode))
1082 return (addend & 0x3fff) == 0;
1083 return false;
1085 return false;
1087 case CONST_VECTOR:
1088 if (mode == V2SFmode)
1089 return satisfies_constraint_Y (x);
1091 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1092 && GET_MODE_SIZE (mode) <= 8);
1094 default:
1095 return false;
1099 /* Don't allow TLS addresses to get spilled to memory. */
1101 static bool
1102 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1104 if (mode == RFmode)
1105 return true;
1106 return tls_symbolic_operand_type (x) != 0;
1109 /* Expand a symbolic constant load. */
1111 bool
1112 ia64_expand_load_address (rtx dest, rtx src)
1114 gcc_assert (GET_CODE (dest) == REG);
1116 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1117 having to pointer-extend the value afterward. Other forms of address
1118 computation below are also more natural to compute as 64-bit quantities.
1119 If we've been given an SImode destination register, change it. */
1120 if (GET_MODE (dest) != Pmode)
1121 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1122 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1124 if (TARGET_NO_PIC)
1125 return false;
1126 if (small_addr_symbolic_operand (src, VOIDmode))
1127 return false;
1129 if (TARGET_AUTO_PIC)
1130 emit_insn (gen_load_gprel64 (dest, src));
1131 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1132 emit_insn (gen_load_fptr (dest, src));
1133 else if (sdata_symbolic_operand (src, VOIDmode))
1134 emit_insn (gen_load_gprel (dest, src));
1135 else
1137 HOST_WIDE_INT addend = 0;
1138 rtx tmp;
1140 /* We did split constant offsets in ia64_expand_move, and we did try
1141 to keep them split in move_operand, but we also allowed reload to
1142 rematerialize arbitrary constants rather than spill the value to
1143 the stack and reload it. So we have to be prepared here to split
1144 them apart again. */
1145 if (GET_CODE (src) == CONST)
1147 HOST_WIDE_INT hi, lo;
1149 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1150 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1151 hi = hi - lo;
1153 if (lo != 0)
1155 addend = lo;
1156 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1160 tmp = gen_rtx_HIGH (Pmode, src);
1161 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1162 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1164 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1165 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1167 if (addend)
1169 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1170 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1174 return true;
1177 static GTY(()) rtx gen_tls_tga;
1178 static rtx
1179 gen_tls_get_addr (void)
1181 if (!gen_tls_tga)
1182 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1183 return gen_tls_tga;
1186 static GTY(()) rtx thread_pointer_rtx;
1187 static rtx
1188 gen_thread_pointer (void)
1190 if (!thread_pointer_rtx)
1191 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1192 return thread_pointer_rtx;
1195 static rtx
1196 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1197 rtx orig_op1, HOST_WIDE_INT addend)
1199 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1200 rtx_insn *insns;
1201 rtx orig_op0 = op0;
1202 HOST_WIDE_INT addend_lo, addend_hi;
1204 switch (tls_kind)
1206 case TLS_MODEL_GLOBAL_DYNAMIC:
1207 start_sequence ();
1209 tga_op1 = gen_reg_rtx (Pmode);
1210 emit_insn (gen_load_dtpmod (tga_op1, op1));
1212 tga_op2 = gen_reg_rtx (Pmode);
1213 emit_insn (gen_load_dtprel (tga_op2, op1));
1215 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1216 LCT_CONST, Pmode, 2, tga_op1,
1217 Pmode, tga_op2, Pmode);
1219 insns = get_insns ();
1220 end_sequence ();
1222 if (GET_MODE (op0) != Pmode)
1223 op0 = tga_ret;
1224 emit_libcall_block (insns, op0, tga_ret, op1);
1225 break;
1227 case TLS_MODEL_LOCAL_DYNAMIC:
1228 /* ??? This isn't the completely proper way to do local-dynamic
1229 If the call to __tls_get_addr is used only by a single symbol,
1230 then we should (somehow) move the dtprel to the second arg
1231 to avoid the extra add. */
1232 start_sequence ();
1234 tga_op1 = gen_reg_rtx (Pmode);
1235 emit_insn (gen_load_dtpmod (tga_op1, op1));
1237 tga_op2 = const0_rtx;
1239 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1240 LCT_CONST, Pmode, 2, tga_op1,
1241 Pmode, tga_op2, Pmode);
1243 insns = get_insns ();
1244 end_sequence ();
1246 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1247 UNSPEC_LD_BASE);
1248 tmp = gen_reg_rtx (Pmode);
1249 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1251 if (!register_operand (op0, Pmode))
1252 op0 = gen_reg_rtx (Pmode);
1253 if (TARGET_TLS64)
1255 emit_insn (gen_load_dtprel (op0, op1));
1256 emit_insn (gen_adddi3 (op0, tmp, op0));
1258 else
1259 emit_insn (gen_add_dtprel (op0, op1, tmp));
1260 break;
1262 case TLS_MODEL_INITIAL_EXEC:
1263 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1264 addend_hi = addend - addend_lo;
1266 op1 = plus_constant (Pmode, op1, addend_hi);
1267 addend = addend_lo;
1269 tmp = gen_reg_rtx (Pmode);
1270 emit_insn (gen_load_tprel (tmp, op1));
1272 if (!register_operand (op0, Pmode))
1273 op0 = gen_reg_rtx (Pmode);
1274 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1275 break;
1277 case TLS_MODEL_LOCAL_EXEC:
1278 if (!register_operand (op0, Pmode))
1279 op0 = gen_reg_rtx (Pmode);
1281 op1 = orig_op1;
1282 addend = 0;
1283 if (TARGET_TLS64)
1285 emit_insn (gen_load_tprel (op0, op1));
1286 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1288 else
1289 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1290 break;
1292 default:
1293 gcc_unreachable ();
1296 if (addend)
1297 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1298 orig_op0, 1, OPTAB_DIRECT);
1299 if (orig_op0 == op0)
1300 return NULL_RTX;
1301 if (GET_MODE (orig_op0) == Pmode)
1302 return op0;
1303 return gen_lowpart (GET_MODE (orig_op0), op0);
1307 ia64_expand_move (rtx op0, rtx op1)
1309 machine_mode mode = GET_MODE (op0);
1311 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1312 op1 = force_reg (mode, op1);
1314 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1316 HOST_WIDE_INT addend = 0;
1317 enum tls_model tls_kind;
1318 rtx sym = op1;
1320 if (GET_CODE (op1) == CONST
1321 && GET_CODE (XEXP (op1, 0)) == PLUS
1322 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1324 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1325 sym = XEXP (XEXP (op1, 0), 0);
1328 tls_kind = tls_symbolic_operand_type (sym);
1329 if (tls_kind)
1330 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1332 if (any_offset_symbol_operand (sym, mode))
1333 addend = 0;
1334 else if (aligned_offset_symbol_operand (sym, mode))
1336 HOST_WIDE_INT addend_lo, addend_hi;
1338 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1339 addend_hi = addend - addend_lo;
1341 if (addend_lo != 0)
1343 op1 = plus_constant (mode, sym, addend_hi);
1344 addend = addend_lo;
1346 else
1347 addend = 0;
1349 else
1350 op1 = sym;
1352 if (reload_completed)
1354 /* We really should have taken care of this offset earlier. */
1355 gcc_assert (addend == 0);
1356 if (ia64_expand_load_address (op0, op1))
1357 return NULL_RTX;
1360 if (addend)
1362 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1364 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1366 op1 = expand_simple_binop (mode, PLUS, subtarget,
1367 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1368 if (op0 == op1)
1369 return NULL_RTX;
1373 return op1;
1376 /* Split a move from OP1 to OP0 conditional on COND. */
1378 void
1379 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1381 rtx_insn *insn, *first = get_last_insn ();
1383 emit_move_insn (op0, op1);
1385 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1386 if (INSN_P (insn))
1387 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1388 PATTERN (insn));
1391 /* Split a post-reload TImode or TFmode reference into two DImode
1392 components. This is made extra difficult by the fact that we do
1393 not get any scratch registers to work with, because reload cannot
1394 be prevented from giving us a scratch that overlaps the register
1395 pair involved. So instead, when addressing memory, we tweak the
1396 pointer register up and back down with POST_INCs. Or up and not
1397 back down when we can get away with it.
1399 REVERSED is true when the loads must be done in reversed order
1400 (high word first) for correctness. DEAD is true when the pointer
1401 dies with the second insn we generate and therefore the second
1402 address must not carry a postmodify.
1404 May return an insn which is to be emitted after the moves. */
1406 static rtx
1407 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1409 rtx fixup = 0;
1411 switch (GET_CODE (in))
1413 case REG:
1414 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1415 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1416 break;
1418 case CONST_INT:
1419 case CONST_DOUBLE:
1420 /* Cannot occur reversed. */
1421 gcc_assert (!reversed);
1423 if (GET_MODE (in) != TFmode)
1424 split_double (in, &out[0], &out[1]);
1425 else
1426 /* split_double does not understand how to split a TFmode
1427 quantity into a pair of DImode constants. */
1429 REAL_VALUE_TYPE r;
1430 unsigned HOST_WIDE_INT p[2];
1431 long l[4]; /* TFmode is 128 bits */
1433 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1434 real_to_target (l, &r, TFmode);
1436 if (FLOAT_WORDS_BIG_ENDIAN)
1438 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1439 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1441 else
1443 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1444 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1446 out[0] = GEN_INT (p[0]);
1447 out[1] = GEN_INT (p[1]);
1449 break;
1451 case MEM:
1453 rtx base = XEXP (in, 0);
1454 rtx offset;
1456 switch (GET_CODE (base))
1458 case REG:
1459 if (!reversed)
1461 out[0] = adjust_automodify_address
1462 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1463 out[1] = adjust_automodify_address
1464 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1466 else
1468 /* Reversal requires a pre-increment, which can only
1469 be done as a separate insn. */
1470 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1471 out[0] = adjust_automodify_address
1472 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1473 out[1] = adjust_address (in, DImode, 0);
1475 break;
1477 case POST_INC:
1478 gcc_assert (!reversed && !dead);
1480 /* Just do the increment in two steps. */
1481 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1482 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1483 break;
1485 case POST_DEC:
1486 gcc_assert (!reversed && !dead);
1488 /* Add 8, subtract 24. */
1489 base = XEXP (base, 0);
1490 out[0] = adjust_automodify_address
1491 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1492 out[1] = adjust_automodify_address
1493 (in, DImode,
1494 gen_rtx_POST_MODIFY (Pmode, base,
1495 plus_constant (Pmode, base, -24)),
1497 break;
1499 case POST_MODIFY:
1500 gcc_assert (!reversed && !dead);
1502 /* Extract and adjust the modification. This case is
1503 trickier than the others, because we might have an
1504 index register, or we might have a combined offset that
1505 doesn't fit a signed 9-bit displacement field. We can
1506 assume the incoming expression is already legitimate. */
1507 offset = XEXP (base, 1);
1508 base = XEXP (base, 0);
1510 out[0] = adjust_automodify_address
1511 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1513 if (GET_CODE (XEXP (offset, 1)) == REG)
1515 /* Can't adjust the postmodify to match. Emit the
1516 original, then a separate addition insn. */
1517 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1518 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1520 else
1522 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1523 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1525 /* Again the postmodify cannot be made to match,
1526 but in this case it's more efficient to get rid
1527 of the postmodify entirely and fix up with an
1528 add insn. */
1529 out[1] = adjust_automodify_address (in, DImode, base, 8);
1530 fixup = gen_adddi3
1531 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1533 else
1535 /* Combined offset still fits in the displacement field.
1536 (We cannot overflow it at the high end.) */
1537 out[1] = adjust_automodify_address
1538 (in, DImode, gen_rtx_POST_MODIFY
1539 (Pmode, base, gen_rtx_PLUS
1540 (Pmode, base,
1541 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1545 break;
1547 default:
1548 gcc_unreachable ();
1550 break;
1553 default:
1554 gcc_unreachable ();
1557 return fixup;
1560 /* Split a TImode or TFmode move instruction after reload.
1561 This is used by *movtf_internal and *movti_internal. */
1562 void
1563 ia64_split_tmode_move (rtx operands[])
1565 rtx in[2], out[2], insn;
1566 rtx fixup[2];
1567 bool dead = false;
1568 bool reversed = false;
1570 /* It is possible for reload to decide to overwrite a pointer with
1571 the value it points to. In that case we have to do the loads in
1572 the appropriate order so that the pointer is not destroyed too
1573 early. Also we must not generate a postmodify for that second
1574 load, or rws_access_regno will die. And we must not generate a
1575 postmodify for the second load if the destination register
1576 overlaps with the base register. */
1577 if (GET_CODE (operands[1]) == MEM
1578 && reg_overlap_mentioned_p (operands[0], operands[1]))
1580 rtx base = XEXP (operands[1], 0);
1581 while (GET_CODE (base) != REG)
1582 base = XEXP (base, 0);
1584 if (REGNO (base) == REGNO (operands[0]))
1585 reversed = true;
1587 if (refers_to_regno_p (REGNO (operands[0]),
1588 REGNO (operands[0])+2,
1589 base, 0))
1590 dead = true;
1592 /* Another reason to do the moves in reversed order is if the first
1593 element of the target register pair is also the second element of
1594 the source register pair. */
1595 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1596 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1597 reversed = true;
1599 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1600 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1602 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1603 if (GET_CODE (EXP) == MEM \
1604 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1605 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1606 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1607 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1609 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1610 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1611 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1613 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1614 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1615 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1617 if (fixup[0])
1618 emit_insn (fixup[0]);
1619 if (fixup[1])
1620 emit_insn (fixup[1]);
1622 #undef MAYBE_ADD_REG_INC_NOTE
1625 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1626 through memory plus an extra GR scratch register. Except that you can
1627 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1628 SECONDARY_RELOAD_CLASS, but not both.
1630 We got into problems in the first place by allowing a construct like
1631 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1632 This solution attempts to prevent this situation from occurring. When
1633 we see something like the above, we spill the inner register to memory. */
1635 static rtx
1636 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1638 if (GET_CODE (in) == SUBREG
1639 && GET_MODE (SUBREG_REG (in)) == TImode
1640 && GET_CODE (SUBREG_REG (in)) == REG)
1642 rtx memt = assign_stack_temp (TImode, 16);
1643 emit_move_insn (memt, SUBREG_REG (in));
1644 return adjust_address (memt, mode, 0);
1646 else if (force && GET_CODE (in) == REG)
1648 rtx memx = assign_stack_temp (mode, 16);
1649 emit_move_insn (memx, in);
1650 return memx;
1652 else
1653 return in;
1656 /* Expand the movxf or movrf pattern (MODE says which) with the given
1657 OPERANDS, returning true if the pattern should then invoke
1658 DONE. */
1660 bool
1661 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1663 rtx op0 = operands[0];
1665 if (GET_CODE (op0) == SUBREG)
1666 op0 = SUBREG_REG (op0);
1668 /* We must support XFmode loads into general registers for stdarg/vararg,
1669 unprototyped calls, and a rare case where a long double is passed as
1670 an argument after a float HFA fills the FP registers. We split them into
1671 DImode loads for convenience. We also need to support XFmode stores
1672 for the last case. This case does not happen for stdarg/vararg routines,
1673 because we do a block store to memory of unnamed arguments. */
1675 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1677 rtx out[2];
1679 /* We're hoping to transform everything that deals with XFmode
1680 quantities and GR registers early in the compiler. */
1681 gcc_assert (can_create_pseudo_p ());
1683 /* Struct to register can just use TImode instead. */
1684 if ((GET_CODE (operands[1]) == SUBREG
1685 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1686 || (GET_CODE (operands[1]) == REG
1687 && GR_REGNO_P (REGNO (operands[1]))))
1689 rtx op1 = operands[1];
1691 if (GET_CODE (op1) == SUBREG)
1692 op1 = SUBREG_REG (op1);
1693 else
1694 op1 = gen_rtx_REG (TImode, REGNO (op1));
1696 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1697 return true;
1700 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1702 /* Don't word-swap when reading in the constant. */
1703 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1704 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1705 0, mode));
1706 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1707 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1708 0, mode));
1709 return true;
1712 /* If the quantity is in a register not known to be GR, spill it. */
1713 if (register_operand (operands[1], mode))
1714 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1716 gcc_assert (GET_CODE (operands[1]) == MEM);
1718 /* Don't word-swap when reading in the value. */
1719 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1720 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1722 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1723 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1724 return true;
1727 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1729 /* We're hoping to transform everything that deals with XFmode
1730 quantities and GR registers early in the compiler. */
1731 gcc_assert (can_create_pseudo_p ());
1733 /* Op0 can't be a GR_REG here, as that case is handled above.
1734 If op0 is a register, then we spill op1, so that we now have a
1735 MEM operand. This requires creating an XFmode subreg of a TImode reg
1736 to force the spill. */
1737 if (register_operand (operands[0], mode))
1739 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1740 op1 = gen_rtx_SUBREG (mode, op1, 0);
1741 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1744 else
1746 rtx in[2];
1748 gcc_assert (GET_CODE (operands[0]) == MEM);
1750 /* Don't word-swap when writing out the value. */
1751 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1752 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1754 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1755 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1756 return true;
1760 if (!reload_in_progress && !reload_completed)
1762 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1764 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1766 rtx memt, memx, in = operands[1];
1767 if (CONSTANT_P (in))
1768 in = validize_mem (force_const_mem (mode, in));
1769 if (GET_CODE (in) == MEM)
1770 memt = adjust_address (in, TImode, 0);
1771 else
1773 memt = assign_stack_temp (TImode, 16);
1774 memx = adjust_address (memt, mode, 0);
1775 emit_move_insn (memx, in);
1777 emit_move_insn (op0, memt);
1778 return true;
1781 if (!ia64_move_ok (operands[0], operands[1]))
1782 operands[1] = force_reg (mode, operands[1]);
1785 return false;
1788 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1789 with the expression that holds the compare result (in VOIDmode). */
1791 static GTY(()) rtx cmptf_libfunc;
1793 void
1794 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1796 enum rtx_code code = GET_CODE (*expr);
1797 rtx cmp;
1799 /* If we have a BImode input, then we already have a compare result, and
1800 do not need to emit another comparison. */
1801 if (GET_MODE (*op0) == BImode)
1803 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1804 cmp = *op0;
1806 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1807 magic number as its third argument, that indicates what to do.
1808 The return value is an integer to be compared against zero. */
1809 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1811 enum qfcmp_magic {
1812 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
1813 QCMP_UNORD = 2,
1814 QCMP_EQ = 4,
1815 QCMP_LT = 8,
1816 QCMP_GT = 16
1818 int magic;
1819 enum rtx_code ncode;
1820 rtx ret, insns;
1822 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1823 switch (code)
1825 /* 1 = equal, 0 = not equal. Equality operators do
1826 not raise FP_INVALID when given a NaN operand. */
1827 case EQ: magic = QCMP_EQ; ncode = NE; break;
1828 case NE: magic = QCMP_EQ; ncode = EQ; break;
1829 /* isunordered() from C99. */
1830 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1831 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1832 /* Relational operators raise FP_INVALID when given
1833 a NaN operand. */
1834 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1835 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1836 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1837 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1838 /* Unordered relational operators do not raise FP_INVALID
1839 when given a NaN operand. */
1840 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1841 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1842 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1843 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1844 /* Not supported. */
1845 case UNEQ:
1846 case LTGT:
1847 default: gcc_unreachable ();
1850 start_sequence ();
1852 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1853 *op0, TFmode, *op1, TFmode,
1854 GEN_INT (magic), DImode);
1855 cmp = gen_reg_rtx (BImode);
1856 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1857 gen_rtx_fmt_ee (ncode, BImode,
1858 ret, const0_rtx)));
1860 insns = get_insns ();
1861 end_sequence ();
1863 emit_libcall_block (insns, cmp, cmp,
1864 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1865 code = NE;
1867 else
1869 cmp = gen_reg_rtx (BImode);
1870 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1871 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1872 code = NE;
1875 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1876 *op0 = cmp;
1877 *op1 = const0_rtx;
1880 /* Generate an integral vector comparison. Return true if the condition has
1881 been reversed, and so the sense of the comparison should be inverted. */
1883 static bool
1884 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1885 rtx dest, rtx op0, rtx op1)
1887 bool negate = false;
1888 rtx x;
1890 /* Canonicalize the comparison to EQ, GT, GTU. */
1891 switch (code)
1893 case EQ:
1894 case GT:
1895 case GTU:
1896 break;
1898 case NE:
1899 case LE:
1900 case LEU:
1901 code = reverse_condition (code);
1902 negate = true;
1903 break;
1905 case GE:
1906 case GEU:
1907 code = reverse_condition (code);
1908 negate = true;
1909 /* FALLTHRU */
1911 case LT:
1912 case LTU:
1913 code = swap_condition (code);
1914 x = op0, op0 = op1, op1 = x;
1915 break;
1917 default:
1918 gcc_unreachable ();
1921 /* Unsigned parallel compare is not supported by the hardware. Play some
1922 tricks to turn this into a signed comparison against 0. */
1923 if (code == GTU)
1925 switch (mode)
1927 case V2SImode:
1929 rtx t1, t2, mask;
1931 /* Subtract (-(INT MAX) - 1) from both operands to make
1932 them signed. */
1933 mask = GEN_INT (0x80000000);
1934 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1935 mask = force_reg (mode, mask);
1936 t1 = gen_reg_rtx (mode);
1937 emit_insn (gen_subv2si3 (t1, op0, mask));
1938 t2 = gen_reg_rtx (mode);
1939 emit_insn (gen_subv2si3 (t2, op1, mask));
1940 op0 = t1;
1941 op1 = t2;
1942 code = GT;
1944 break;
1946 case V8QImode:
1947 case V4HImode:
1948 /* Perform a parallel unsigned saturating subtraction. */
1949 x = gen_reg_rtx (mode);
1950 emit_insn (gen_rtx_SET (VOIDmode, x,
1951 gen_rtx_US_MINUS (mode, op0, op1)));
1953 code = EQ;
1954 op0 = x;
1955 op1 = CONST0_RTX (mode);
1956 negate = !negate;
1957 break;
1959 default:
1960 gcc_unreachable ();
1964 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1965 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1967 return negate;
1970 /* Emit an integral vector conditional move. */
1972 void
1973 ia64_expand_vecint_cmov (rtx operands[])
1975 machine_mode mode = GET_MODE (operands[0]);
1976 enum rtx_code code = GET_CODE (operands[3]);
1977 bool negate;
1978 rtx cmp, x, ot, of;
1980 cmp = gen_reg_rtx (mode);
1981 negate = ia64_expand_vecint_compare (code, mode, cmp,
1982 operands[4], operands[5]);
1984 ot = operands[1+negate];
1985 of = operands[2-negate];
1987 if (ot == CONST0_RTX (mode))
1989 if (of == CONST0_RTX (mode))
1991 emit_move_insn (operands[0], ot);
1992 return;
1995 x = gen_rtx_NOT (mode, cmp);
1996 x = gen_rtx_AND (mode, x, of);
1997 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1999 else if (of == CONST0_RTX (mode))
2001 x = gen_rtx_AND (mode, cmp, ot);
2002 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
2004 else
2006 rtx t, f;
2008 t = gen_reg_rtx (mode);
2009 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2010 emit_insn (gen_rtx_SET (VOIDmode, t, x));
2012 f = gen_reg_rtx (mode);
2013 x = gen_rtx_NOT (mode, cmp);
2014 x = gen_rtx_AND (mode, x, operands[2-negate]);
2015 emit_insn (gen_rtx_SET (VOIDmode, f, x));
2017 x = gen_rtx_IOR (mode, t, f);
2018 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
2022 /* Emit an integral vector min or max operation. Return true if all done. */
2024 bool
2025 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2026 rtx operands[])
2028 rtx xops[6];
2030 /* These four combinations are supported directly. */
2031 if (mode == V8QImode && (code == UMIN || code == UMAX))
2032 return false;
2033 if (mode == V4HImode && (code == SMIN || code == SMAX))
2034 return false;
2036 /* This combination can be implemented with only saturating subtraction. */
2037 if (mode == V4HImode && code == UMAX)
2039 rtx x, tmp = gen_reg_rtx (mode);
2041 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2042 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
2044 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2045 return true;
2048 /* Everything else implemented via vector comparisons. */
2049 xops[0] = operands[0];
2050 xops[4] = xops[1] = operands[1];
2051 xops[5] = xops[2] = operands[2];
2053 switch (code)
2055 case UMIN:
2056 code = LTU;
2057 break;
2058 case UMAX:
2059 code = GTU;
2060 break;
2061 case SMIN:
2062 code = LT;
2063 break;
2064 case SMAX:
2065 code = GT;
2066 break;
2067 default:
2068 gcc_unreachable ();
2070 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2072 ia64_expand_vecint_cmov (xops);
2073 return true;
2076 /* The vectors LO and HI each contain N halves of a double-wide vector.
2077 Reassemble either the first N/2 or the second N/2 elements. */
2079 void
2080 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2082 machine_mode vmode = GET_MODE (lo);
2083 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2084 struct expand_vec_perm_d d;
2085 bool ok;
2087 d.target = gen_lowpart (vmode, out);
2088 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2089 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2090 d.vmode = vmode;
2091 d.nelt = nelt;
2092 d.one_operand_p = false;
2093 d.testing_p = false;
2095 high = (highp ? nelt / 2 : 0);
2096 for (i = 0; i < nelt / 2; ++i)
2098 d.perm[i * 2] = i + high;
2099 d.perm[i * 2 + 1] = i + high + nelt;
2102 ok = ia64_expand_vec_perm_const_1 (&d);
2103 gcc_assert (ok);
2106 /* Return a vector of the sign-extension of VEC. */
2108 static rtx
2109 ia64_unpack_sign (rtx vec, bool unsignedp)
2111 machine_mode mode = GET_MODE (vec);
2112 rtx zero = CONST0_RTX (mode);
2114 if (unsignedp)
2115 return zero;
2116 else
2118 rtx sign = gen_reg_rtx (mode);
2119 bool neg;
2121 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2122 gcc_assert (!neg);
2124 return sign;
2128 /* Emit an integral vector unpack operation. */
2130 void
2131 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2133 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2134 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2137 /* Emit an integral vector widening sum operations. */
2139 void
2140 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2142 machine_mode wmode;
2143 rtx l, h, t, sign;
2145 sign = ia64_unpack_sign (operands[1], unsignedp);
2147 wmode = GET_MODE (operands[0]);
2148 l = gen_reg_rtx (wmode);
2149 h = gen_reg_rtx (wmode);
2151 ia64_unpack_assemble (l, operands[1], sign, false);
2152 ia64_unpack_assemble (h, operands[1], sign, true);
2154 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2155 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2156 if (t != operands[0])
2157 emit_move_insn (operands[0], t);
2160 /* Emit the appropriate sequence for a call. */
2162 void
2163 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2164 int sibcall_p)
2166 rtx insn, b0;
2168 addr = XEXP (addr, 0);
2169 addr = convert_memory_address (DImode, addr);
2170 b0 = gen_rtx_REG (DImode, R_BR (0));
2172 /* ??? Should do this for functions known to bind local too. */
2173 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2175 if (sibcall_p)
2176 insn = gen_sibcall_nogp (addr);
2177 else if (! retval)
2178 insn = gen_call_nogp (addr, b0);
2179 else
2180 insn = gen_call_value_nogp (retval, addr, b0);
2181 insn = emit_call_insn (insn);
2183 else
2185 if (sibcall_p)
2186 insn = gen_sibcall_gp (addr);
2187 else if (! retval)
2188 insn = gen_call_gp (addr, b0);
2189 else
2190 insn = gen_call_value_gp (retval, addr, b0);
2191 insn = emit_call_insn (insn);
2193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2196 if (sibcall_p)
2197 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2199 if (TARGET_ABI_OPEN_VMS)
2200 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2201 gen_rtx_REG (DImode, GR_REG (25)));
2204 static void
2205 reg_emitted (enum ia64_frame_regs r)
2207 if (emitted_frame_related_regs[r] == 0)
2208 emitted_frame_related_regs[r] = current_frame_info.r[r];
2209 else
2210 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2213 static int
2214 get_reg (enum ia64_frame_regs r)
2216 reg_emitted (r);
2217 return current_frame_info.r[r];
2220 static bool
2221 is_emitted (int regno)
2223 unsigned int r;
2225 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2226 if (emitted_frame_related_regs[r] == regno)
2227 return true;
2228 return false;
2231 void
2232 ia64_reload_gp (void)
2234 rtx tmp;
2236 if (current_frame_info.r[reg_save_gp])
2238 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2240 else
2242 HOST_WIDE_INT offset;
2243 rtx offset_r;
2245 offset = (current_frame_info.spill_cfa_off
2246 + current_frame_info.spill_size);
2247 if (frame_pointer_needed)
2249 tmp = hard_frame_pointer_rtx;
2250 offset = -offset;
2252 else
2254 tmp = stack_pointer_rtx;
2255 offset = current_frame_info.total_size - offset;
2258 offset_r = GEN_INT (offset);
2259 if (satisfies_constraint_I (offset_r))
2260 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2261 else
2263 emit_move_insn (pic_offset_table_rtx, offset_r);
2264 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2265 pic_offset_table_rtx, tmp));
2268 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2271 emit_move_insn (pic_offset_table_rtx, tmp);
2274 void
2275 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2276 rtx scratch_b, int noreturn_p, int sibcall_p)
2278 rtx insn;
2279 bool is_desc = false;
2281 /* If we find we're calling through a register, then we're actually
2282 calling through a descriptor, so load up the values. */
2283 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2285 rtx tmp;
2286 bool addr_dead_p;
2288 /* ??? We are currently constrained to *not* use peep2, because
2289 we can legitimately change the global lifetime of the GP
2290 (in the form of killing where previously live). This is
2291 because a call through a descriptor doesn't use the previous
2292 value of the GP, while a direct call does, and we do not
2293 commit to either form until the split here.
2295 That said, this means that we lack precise life info for
2296 whether ADDR is dead after this call. This is not terribly
2297 important, since we can fix things up essentially for free
2298 with the POST_DEC below, but it's nice to not use it when we
2299 can immediately tell it's not necessary. */
2300 addr_dead_p = ((noreturn_p || sibcall_p
2301 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2302 REGNO (addr)))
2303 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2305 /* Load the code address into scratch_b. */
2306 tmp = gen_rtx_POST_INC (Pmode, addr);
2307 tmp = gen_rtx_MEM (Pmode, tmp);
2308 emit_move_insn (scratch_r, tmp);
2309 emit_move_insn (scratch_b, scratch_r);
2311 /* Load the GP address. If ADDR is not dead here, then we must
2312 revert the change made above via the POST_INCREMENT. */
2313 if (!addr_dead_p)
2314 tmp = gen_rtx_POST_DEC (Pmode, addr);
2315 else
2316 tmp = addr;
2317 tmp = gen_rtx_MEM (Pmode, tmp);
2318 emit_move_insn (pic_offset_table_rtx, tmp);
2320 is_desc = true;
2321 addr = scratch_b;
2324 if (sibcall_p)
2325 insn = gen_sibcall_nogp (addr);
2326 else if (retval)
2327 insn = gen_call_value_nogp (retval, addr, retaddr);
2328 else
2329 insn = gen_call_nogp (addr, retaddr);
2330 emit_call_insn (insn);
2332 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2333 ia64_reload_gp ();
2336 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2338 This differs from the generic code in that we know about the zero-extending
2339 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2340 also know that ld.acq+cmpxchg.rel equals a full barrier.
2342 The loop we want to generate looks like
2344 cmp_reg = mem;
2345 label:
2346 old_reg = cmp_reg;
2347 new_reg = cmp_reg op val;
2348 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2349 if (cmp_reg != old_reg)
2350 goto label;
2352 Note that we only do the plain load from memory once. Subsequent
2353 iterations use the value loaded by the compare-and-swap pattern. */
2355 void
2356 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2357 rtx old_dst, rtx new_dst, enum memmodel model)
2359 machine_mode mode = GET_MODE (mem);
2360 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2361 enum insn_code icode;
2363 /* Special case for using fetchadd. */
2364 if ((mode == SImode || mode == DImode)
2365 && (code == PLUS || code == MINUS)
2366 && fetchadd_operand (val, mode))
2368 if (code == MINUS)
2369 val = GEN_INT (-INTVAL (val));
2371 if (!old_dst)
2372 old_dst = gen_reg_rtx (mode);
2374 switch (model)
2376 case MEMMODEL_ACQ_REL:
2377 case MEMMODEL_SEQ_CST:
2378 emit_insn (gen_memory_barrier ());
2379 /* FALLTHRU */
2380 case MEMMODEL_RELAXED:
2381 case MEMMODEL_ACQUIRE:
2382 case MEMMODEL_CONSUME:
2383 if (mode == SImode)
2384 icode = CODE_FOR_fetchadd_acq_si;
2385 else
2386 icode = CODE_FOR_fetchadd_acq_di;
2387 break;
2388 case MEMMODEL_RELEASE:
2389 if (mode == SImode)
2390 icode = CODE_FOR_fetchadd_rel_si;
2391 else
2392 icode = CODE_FOR_fetchadd_rel_di;
2393 break;
2395 default:
2396 gcc_unreachable ();
2399 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2401 if (new_dst)
2403 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2404 true, OPTAB_WIDEN);
2405 if (new_reg != new_dst)
2406 emit_move_insn (new_dst, new_reg);
2408 return;
2411 /* Because of the volatile mem read, we get an ld.acq, which is the
2412 front half of the full barrier. The end half is the cmpxchg.rel.
2413 For relaxed and release memory models, we don't need this. But we
2414 also don't bother trying to prevent it either. */
2415 gcc_assert (model == MEMMODEL_RELAXED
2416 || model == MEMMODEL_RELEASE
2417 || MEM_VOLATILE_P (mem));
2419 old_reg = gen_reg_rtx (DImode);
2420 cmp_reg = gen_reg_rtx (DImode);
2421 label = gen_label_rtx ();
2423 if (mode != DImode)
2425 val = simplify_gen_subreg (DImode, val, mode, 0);
2426 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2428 else
2429 emit_move_insn (cmp_reg, mem);
2431 emit_label (label);
2433 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2434 emit_move_insn (old_reg, cmp_reg);
2435 emit_move_insn (ar_ccv, cmp_reg);
2437 if (old_dst)
2438 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2440 new_reg = cmp_reg;
2441 if (code == NOT)
2443 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2444 true, OPTAB_DIRECT);
2445 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2447 else
2448 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2449 true, OPTAB_DIRECT);
2451 if (mode != DImode)
2452 new_reg = gen_lowpart (mode, new_reg);
2453 if (new_dst)
2454 emit_move_insn (new_dst, new_reg);
2456 switch (model)
2458 case MEMMODEL_RELAXED:
2459 case MEMMODEL_ACQUIRE:
2460 case MEMMODEL_CONSUME:
2461 switch (mode)
2463 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2464 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2465 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2466 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2467 default:
2468 gcc_unreachable ();
2470 break;
2472 case MEMMODEL_RELEASE:
2473 case MEMMODEL_ACQ_REL:
2474 case MEMMODEL_SEQ_CST:
2475 switch (mode)
2477 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2478 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2479 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2480 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2481 default:
2482 gcc_unreachable ();
2484 break;
2486 default:
2487 gcc_unreachable ();
2490 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2492 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2495 /* Begin the assembly file. */
2497 static void
2498 ia64_file_start (void)
2500 default_file_start ();
2501 emit_safe_across_calls ();
2504 void
2505 emit_safe_across_calls (void)
2507 unsigned int rs, re;
2508 int out_state;
2510 rs = 1;
2511 out_state = 0;
2512 while (1)
2514 while (rs < 64 && call_used_regs[PR_REG (rs)])
2515 rs++;
2516 if (rs >= 64)
2517 break;
2518 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2519 continue;
2520 if (out_state == 0)
2522 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2523 out_state = 1;
2525 else
2526 fputc (',', asm_out_file);
2527 if (re == rs + 1)
2528 fprintf (asm_out_file, "p%u", rs);
2529 else
2530 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2531 rs = re + 1;
2533 if (out_state)
2534 fputc ('\n', asm_out_file);
2537 /* Globalize a declaration. */
2539 static void
2540 ia64_globalize_decl_name (FILE * stream, tree decl)
2542 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2543 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2544 if (version_attr)
2546 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2547 const char *p = TREE_STRING_POINTER (v);
2548 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2550 targetm.asm_out.globalize_label (stream, name);
2551 if (TREE_CODE (decl) == FUNCTION_DECL)
2552 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2555 /* Helper function for ia64_compute_frame_size: find an appropriate general
2556 register to spill some special register to. SPECIAL_SPILL_MASK contains
2557 bits in GR0 to GR31 that have already been allocated by this routine.
2558 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2560 static int
2561 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2563 int regno;
2565 if (emitted_frame_related_regs[r] != 0)
2567 regno = emitted_frame_related_regs[r];
2568 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2569 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2570 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2571 else if (crtl->is_leaf
2572 && regno >= GR_REG (1) && regno <= GR_REG (31))
2573 current_frame_info.gr_used_mask |= 1 << regno;
2575 return regno;
2578 /* If this is a leaf function, first try an otherwise unused
2579 call-clobbered register. */
2580 if (crtl->is_leaf)
2582 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2583 if (! df_regs_ever_live_p (regno)
2584 && call_used_regs[regno]
2585 && ! fixed_regs[regno]
2586 && ! global_regs[regno]
2587 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2588 && ! is_emitted (regno))
2590 current_frame_info.gr_used_mask |= 1 << regno;
2591 return regno;
2595 if (try_locals)
2597 regno = current_frame_info.n_local_regs;
2598 /* If there is a frame pointer, then we can't use loc79, because
2599 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2600 reg_name switching code in ia64_expand_prologue. */
2601 while (regno < (80 - frame_pointer_needed))
2602 if (! is_emitted (LOC_REG (regno++)))
2604 current_frame_info.n_local_regs = regno;
2605 return LOC_REG (regno - 1);
2609 /* Failed to find a general register to spill to. Must use stack. */
2610 return 0;
2613 /* In order to make for nice schedules, we try to allocate every temporary
2614 to a different register. We must of course stay away from call-saved,
2615 fixed, and global registers. We must also stay away from registers
2616 allocated in current_frame_info.gr_used_mask, since those include regs
2617 used all through the prologue.
2619 Any register allocated here must be used immediately. The idea is to
2620 aid scheduling, not to solve data flow problems. */
2622 static int last_scratch_gr_reg;
2624 static int
2625 next_scratch_gr_reg (void)
2627 int i, regno;
2629 for (i = 0; i < 32; ++i)
2631 regno = (last_scratch_gr_reg + i + 1) & 31;
2632 if (call_used_regs[regno]
2633 && ! fixed_regs[regno]
2634 && ! global_regs[regno]
2635 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2637 last_scratch_gr_reg = regno;
2638 return regno;
2642 /* There must be _something_ available. */
2643 gcc_unreachable ();
2646 /* Helper function for ia64_compute_frame_size, called through
2647 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2649 static void
2650 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2652 unsigned int regno = REGNO (reg);
2653 if (regno < 32)
2655 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2656 for (i = 0; i < n; ++i)
2657 current_frame_info.gr_used_mask |= 1 << (regno + i);
2662 /* Returns the number of bytes offset between the frame pointer and the stack
2663 pointer for the current function. SIZE is the number of bytes of space
2664 needed for local variables. */
2666 static void
2667 ia64_compute_frame_size (HOST_WIDE_INT size)
2669 HOST_WIDE_INT total_size;
2670 HOST_WIDE_INT spill_size = 0;
2671 HOST_WIDE_INT extra_spill_size = 0;
2672 HOST_WIDE_INT pretend_args_size;
2673 HARD_REG_SET mask;
2674 int n_spilled = 0;
2675 int spilled_gr_p = 0;
2676 int spilled_fr_p = 0;
2677 unsigned int regno;
2678 int min_regno;
2679 int max_regno;
2680 int i;
2682 if (current_frame_info.initialized)
2683 return;
2685 memset (&current_frame_info, 0, sizeof current_frame_info);
2686 CLEAR_HARD_REG_SET (mask);
2688 /* Don't allocate scratches to the return register. */
2689 diddle_return_value (mark_reg_gr_used_mask, NULL);
2691 /* Don't allocate scratches to the EH scratch registers. */
2692 if (cfun->machine->ia64_eh_epilogue_sp)
2693 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2694 if (cfun->machine->ia64_eh_epilogue_bsp)
2695 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2697 /* Static stack checking uses r2 and r3. */
2698 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2699 current_frame_info.gr_used_mask |= 0xc;
2701 /* Find the size of the register stack frame. We have only 80 local
2702 registers, because we reserve 8 for the inputs and 8 for the
2703 outputs. */
2705 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2706 since we'll be adjusting that down later. */
2707 regno = LOC_REG (78) + ! frame_pointer_needed;
2708 for (; regno >= LOC_REG (0); regno--)
2709 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2710 break;
2711 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2713 /* For functions marked with the syscall_linkage attribute, we must mark
2714 all eight input registers as in use, so that locals aren't visible to
2715 the caller. */
2717 if (cfun->machine->n_varargs > 0
2718 || lookup_attribute ("syscall_linkage",
2719 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2720 current_frame_info.n_input_regs = 8;
2721 else
2723 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2724 if (df_regs_ever_live_p (regno))
2725 break;
2726 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2729 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2730 if (df_regs_ever_live_p (regno))
2731 break;
2732 i = regno - OUT_REG (0) + 1;
2734 #ifndef PROFILE_HOOK
2735 /* When -p profiling, we need one output register for the mcount argument.
2736 Likewise for -a profiling for the bb_init_func argument. For -ax
2737 profiling, we need two output registers for the two bb_init_trace_func
2738 arguments. */
2739 if (crtl->profile)
2740 i = MAX (i, 1);
2741 #endif
2742 current_frame_info.n_output_regs = i;
2744 /* ??? No rotating register support yet. */
2745 current_frame_info.n_rotate_regs = 0;
2747 /* Discover which registers need spilling, and how much room that
2748 will take. Begin with floating point and general registers,
2749 which will always wind up on the stack. */
2751 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2752 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2754 SET_HARD_REG_BIT (mask, regno);
2755 spill_size += 16;
2756 n_spilled += 1;
2757 spilled_fr_p = 1;
2760 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2761 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2763 SET_HARD_REG_BIT (mask, regno);
2764 spill_size += 8;
2765 n_spilled += 1;
2766 spilled_gr_p = 1;
2769 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2770 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2772 SET_HARD_REG_BIT (mask, regno);
2773 spill_size += 8;
2774 n_spilled += 1;
2777 /* Now come all special registers that might get saved in other
2778 general registers. */
2780 if (frame_pointer_needed)
2782 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2783 /* If we did not get a register, then we take LOC79. This is guaranteed
2784 to be free, even if regs_ever_live is already set, because this is
2785 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2786 as we don't count loc79 above. */
2787 if (current_frame_info.r[reg_fp] == 0)
2789 current_frame_info.r[reg_fp] = LOC_REG (79);
2790 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2794 if (! crtl->is_leaf)
2796 /* Emit a save of BR0 if we call other functions. Do this even
2797 if this function doesn't return, as EH depends on this to be
2798 able to unwind the stack. */
2799 SET_HARD_REG_BIT (mask, BR_REG (0));
2801 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2802 if (current_frame_info.r[reg_save_b0] == 0)
2804 extra_spill_size += 8;
2805 n_spilled += 1;
2808 /* Similarly for ar.pfs. */
2809 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2810 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2811 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2813 extra_spill_size += 8;
2814 n_spilled += 1;
2817 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2818 registers are clobbered, so we fall back to the stack. */
2819 current_frame_info.r[reg_save_gp]
2820 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2821 if (current_frame_info.r[reg_save_gp] == 0)
2823 SET_HARD_REG_BIT (mask, GR_REG (1));
2824 spill_size += 8;
2825 n_spilled += 1;
2828 else
2830 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2832 SET_HARD_REG_BIT (mask, BR_REG (0));
2833 extra_spill_size += 8;
2834 n_spilled += 1;
2837 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2839 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2840 current_frame_info.r[reg_save_ar_pfs]
2841 = find_gr_spill (reg_save_ar_pfs, 1);
2842 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2844 extra_spill_size += 8;
2845 n_spilled += 1;
2850 /* Unwind descriptor hackery: things are most efficient if we allocate
2851 consecutive GR save registers for RP, PFS, FP in that order. However,
2852 it is absolutely critical that FP get the only hard register that's
2853 guaranteed to be free, so we allocated it first. If all three did
2854 happen to be allocated hard regs, and are consecutive, rearrange them
2855 into the preferred order now.
2857 If we have already emitted code for any of those registers,
2858 then it's already too late to change. */
2859 min_regno = MIN (current_frame_info.r[reg_fp],
2860 MIN (current_frame_info.r[reg_save_b0],
2861 current_frame_info.r[reg_save_ar_pfs]));
2862 max_regno = MAX (current_frame_info.r[reg_fp],
2863 MAX (current_frame_info.r[reg_save_b0],
2864 current_frame_info.r[reg_save_ar_pfs]));
2865 if (min_regno > 0
2866 && min_regno + 2 == max_regno
2867 && (current_frame_info.r[reg_fp] == min_regno + 1
2868 || current_frame_info.r[reg_save_b0] == min_regno + 1
2869 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2870 && (emitted_frame_related_regs[reg_save_b0] == 0
2871 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2872 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2873 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2874 && (emitted_frame_related_regs[reg_fp] == 0
2875 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2877 current_frame_info.r[reg_save_b0] = min_regno;
2878 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2879 current_frame_info.r[reg_fp] = min_regno + 2;
2882 /* See if we need to store the predicate register block. */
2883 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2884 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2885 break;
2886 if (regno <= PR_REG (63))
2888 SET_HARD_REG_BIT (mask, PR_REG (0));
2889 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2890 if (current_frame_info.r[reg_save_pr] == 0)
2892 extra_spill_size += 8;
2893 n_spilled += 1;
2896 /* ??? Mark them all as used so that register renaming and such
2897 are free to use them. */
2898 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2899 df_set_regs_ever_live (regno, true);
2902 /* If we're forced to use st8.spill, we're forced to save and restore
2903 ar.unat as well. The check for existing liveness allows inline asm
2904 to touch ar.unat. */
2905 if (spilled_gr_p || cfun->machine->n_varargs
2906 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2908 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2909 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2910 current_frame_info.r[reg_save_ar_unat]
2911 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2912 if (current_frame_info.r[reg_save_ar_unat] == 0)
2914 extra_spill_size += 8;
2915 n_spilled += 1;
2919 if (df_regs_ever_live_p (AR_LC_REGNUM))
2921 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2922 current_frame_info.r[reg_save_ar_lc]
2923 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2924 if (current_frame_info.r[reg_save_ar_lc] == 0)
2926 extra_spill_size += 8;
2927 n_spilled += 1;
2931 /* If we have an odd number of words of pretend arguments written to
2932 the stack, then the FR save area will be unaligned. We round the
2933 size of this area up to keep things 16 byte aligned. */
2934 if (spilled_fr_p)
2935 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2936 else
2937 pretend_args_size = crtl->args.pretend_args_size;
2939 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2940 + crtl->outgoing_args_size);
2941 total_size = IA64_STACK_ALIGN (total_size);
2943 /* We always use the 16-byte scratch area provided by the caller, but
2944 if we are a leaf function, there's no one to which we need to provide
2945 a scratch area. However, if the function allocates dynamic stack space,
2946 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2947 so we need to cope. */
2948 if (crtl->is_leaf && !cfun->calls_alloca)
2949 total_size = MAX (0, total_size - 16);
2951 current_frame_info.total_size = total_size;
2952 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2953 current_frame_info.spill_size = spill_size;
2954 current_frame_info.extra_spill_size = extra_spill_size;
2955 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2956 current_frame_info.n_spilled = n_spilled;
2957 current_frame_info.initialized = reload_completed;
2960 /* Worker function for TARGET_CAN_ELIMINATE. */
2962 bool
2963 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2965 return (to == BR_REG (0) ? crtl->is_leaf : true);
2968 /* Compute the initial difference between the specified pair of registers. */
2970 HOST_WIDE_INT
2971 ia64_initial_elimination_offset (int from, int to)
2973 HOST_WIDE_INT offset;
2975 ia64_compute_frame_size (get_frame_size ());
2976 switch (from)
2978 case FRAME_POINTER_REGNUM:
2979 switch (to)
2981 case HARD_FRAME_POINTER_REGNUM:
2982 offset = -current_frame_info.total_size;
2983 if (!crtl->is_leaf || cfun->calls_alloca)
2984 offset += 16 + crtl->outgoing_args_size;
2985 break;
2987 case STACK_POINTER_REGNUM:
2988 offset = 0;
2989 if (!crtl->is_leaf || cfun->calls_alloca)
2990 offset += 16 + crtl->outgoing_args_size;
2991 break;
2993 default:
2994 gcc_unreachable ();
2996 break;
2998 case ARG_POINTER_REGNUM:
2999 /* Arguments start above the 16 byte save area, unless stdarg
3000 in which case we store through the 16 byte save area. */
3001 switch (to)
3003 case HARD_FRAME_POINTER_REGNUM:
3004 offset = 16 - crtl->args.pretend_args_size;
3005 break;
3007 case STACK_POINTER_REGNUM:
3008 offset = (current_frame_info.total_size
3009 + 16 - crtl->args.pretend_args_size);
3010 break;
3012 default:
3013 gcc_unreachable ();
3015 break;
3017 default:
3018 gcc_unreachable ();
3021 return offset;
3024 /* If there are more than a trivial number of register spills, we use
3025 two interleaved iterators so that we can get two memory references
3026 per insn group.
3028 In order to simplify things in the prologue and epilogue expanders,
3029 we use helper functions to fix up the memory references after the
3030 fact with the appropriate offsets to a POST_MODIFY memory mode.
3031 The following data structure tracks the state of the two iterators
3032 while insns are being emitted. */
3034 struct spill_fill_data
3036 rtx_insn *init_after; /* point at which to emit initializations */
3037 rtx init_reg[2]; /* initial base register */
3038 rtx iter_reg[2]; /* the iterator registers */
3039 rtx *prev_addr[2]; /* address of last memory use */
3040 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
3041 HOST_WIDE_INT prev_off[2]; /* last offset */
3042 int n_iter; /* number of iterators in use */
3043 int next_iter; /* next iterator to use */
3044 unsigned int save_gr_used_mask;
3047 static struct spill_fill_data spill_fill_data;
3049 static void
3050 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3052 int i;
3054 spill_fill_data.init_after = get_last_insn ();
3055 spill_fill_data.init_reg[0] = init_reg;
3056 spill_fill_data.init_reg[1] = init_reg;
3057 spill_fill_data.prev_addr[0] = NULL;
3058 spill_fill_data.prev_addr[1] = NULL;
3059 spill_fill_data.prev_insn[0] = NULL;
3060 spill_fill_data.prev_insn[1] = NULL;
3061 spill_fill_data.prev_off[0] = cfa_off;
3062 spill_fill_data.prev_off[1] = cfa_off;
3063 spill_fill_data.next_iter = 0;
3064 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3066 spill_fill_data.n_iter = 1 + (n_spills > 2);
3067 for (i = 0; i < spill_fill_data.n_iter; ++i)
3069 int regno = next_scratch_gr_reg ();
3070 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3071 current_frame_info.gr_used_mask |= 1 << regno;
3075 static void
3076 finish_spill_pointers (void)
3078 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3081 static rtx
3082 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3084 int iter = spill_fill_data.next_iter;
3085 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3086 rtx disp_rtx = GEN_INT (disp);
3087 rtx mem;
3089 if (spill_fill_data.prev_addr[iter])
3091 if (satisfies_constraint_N (disp_rtx))
3093 *spill_fill_data.prev_addr[iter]
3094 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3095 gen_rtx_PLUS (DImode,
3096 spill_fill_data.iter_reg[iter],
3097 disp_rtx));
3098 add_reg_note (spill_fill_data.prev_insn[iter],
3099 REG_INC, spill_fill_data.iter_reg[iter]);
3101 else
3103 /* ??? Could use register post_modify for loads. */
3104 if (!satisfies_constraint_I (disp_rtx))
3106 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3107 emit_move_insn (tmp, disp_rtx);
3108 disp_rtx = tmp;
3110 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3111 spill_fill_data.iter_reg[iter], disp_rtx));
3114 /* Micro-optimization: if we've created a frame pointer, it's at
3115 CFA 0, which may allow the real iterator to be initialized lower,
3116 slightly increasing parallelism. Also, if there are few saves
3117 it may eliminate the iterator entirely. */
3118 else if (disp == 0
3119 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3120 && frame_pointer_needed)
3122 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3123 set_mem_alias_set (mem, get_varargs_alias_set ());
3124 return mem;
3126 else
3128 rtx seq;
3129 rtx_insn *insn;
3131 if (disp == 0)
3132 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3133 spill_fill_data.init_reg[iter]);
3134 else
3136 start_sequence ();
3138 if (!satisfies_constraint_I (disp_rtx))
3140 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3141 emit_move_insn (tmp, disp_rtx);
3142 disp_rtx = tmp;
3145 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3146 spill_fill_data.init_reg[iter],
3147 disp_rtx));
3149 seq = get_insns ();
3150 end_sequence ();
3153 /* Careful for being the first insn in a sequence. */
3154 if (spill_fill_data.init_after)
3155 insn = emit_insn_after (seq, spill_fill_data.init_after);
3156 else
3158 rtx_insn *first = get_insns ();
3159 if (first)
3160 insn = emit_insn_before (seq, first);
3161 else
3162 insn = emit_insn (seq);
3164 spill_fill_data.init_after = insn;
3167 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3169 /* ??? Not all of the spills are for varargs, but some of them are.
3170 The rest of the spills belong in an alias set of their own. But
3171 it doesn't actually hurt to include them here. */
3172 set_mem_alias_set (mem, get_varargs_alias_set ());
3174 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3175 spill_fill_data.prev_off[iter] = cfa_off;
3177 if (++iter >= spill_fill_data.n_iter)
3178 iter = 0;
3179 spill_fill_data.next_iter = iter;
3181 return mem;
3184 static void
3185 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3186 rtx frame_reg)
3188 int iter = spill_fill_data.next_iter;
3189 rtx mem;
3190 rtx_insn *insn;
3192 mem = spill_restore_mem (reg, cfa_off);
3193 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3194 spill_fill_data.prev_insn[iter] = insn;
3196 if (frame_reg)
3198 rtx base;
3199 HOST_WIDE_INT off;
3201 RTX_FRAME_RELATED_P (insn) = 1;
3203 /* Don't even pretend that the unwind code can intuit its way
3204 through a pair of interleaved post_modify iterators. Just
3205 provide the correct answer. */
3207 if (frame_pointer_needed)
3209 base = hard_frame_pointer_rtx;
3210 off = - cfa_off;
3212 else
3214 base = stack_pointer_rtx;
3215 off = current_frame_info.total_size - cfa_off;
3218 add_reg_note (insn, REG_CFA_OFFSET,
3219 gen_rtx_SET (VOIDmode,
3220 gen_rtx_MEM (GET_MODE (reg),
3221 plus_constant (Pmode,
3222 base, off)),
3223 frame_reg));
3227 static void
3228 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3230 int iter = spill_fill_data.next_iter;
3231 rtx_insn *insn;
3233 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3234 GEN_INT (cfa_off)));
3235 spill_fill_data.prev_insn[iter] = insn;
3238 /* Wrapper functions that discards the CONST_INT spill offset. These
3239 exist so that we can give gr_spill/gr_fill the offset they need and
3240 use a consistent function interface. */
3242 static rtx
3243 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3245 return gen_movdi (dest, src);
3248 static rtx
3249 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3251 return gen_fr_spill (dest, src);
3254 static rtx
3255 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3257 return gen_fr_restore (dest, src);
3260 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3262 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3263 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3265 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3266 inclusive. These are offsets from the current stack pointer. BS_SIZE
3267 is the size of the backing store. ??? This clobbers r2 and r3. */
3269 static void
3270 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3271 int bs_size)
3273 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3274 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3275 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3277 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3278 of the Register Stack Engine. We also need to probe it after checking
3279 that the 2 stacks don't overlap. */
3280 emit_insn (gen_bsp_value (r3));
3281 emit_move_insn (r2, GEN_INT (-(first + size)));
3283 /* Compare current value of BSP and SP registers. */
3284 emit_insn (gen_rtx_SET (VOIDmode, p6,
3285 gen_rtx_fmt_ee (LTU, BImode,
3286 r3, stack_pointer_rtx)));
3288 /* Compute the address of the probe for the Backing Store (which grows
3289 towards higher addresses). We probe only at the first offset of
3290 the next page because some OS (eg Linux/ia64) only extend the
3291 backing store when this specific address is hit (but generate a SEGV
3292 on other address). Page size is the worst case (4KB). The reserve
3293 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3294 Also compute the address of the last probe for the memory stack
3295 (which grows towards lower addresses). */
3296 emit_insn (gen_rtx_SET (VOIDmode, r3, plus_constant (Pmode, r3, 4095)));
3297 emit_insn (gen_rtx_SET (VOIDmode, r2,
3298 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3300 /* Compare them and raise SEGV if the former has topped the latter. */
3301 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3302 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3303 gen_rtx_SET (VOIDmode, p6,
3304 gen_rtx_fmt_ee (GEU, BImode,
3305 r3, r2))));
3306 emit_insn (gen_rtx_SET (VOIDmode,
3307 gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3308 const0_rtx),
3309 const0_rtx));
3310 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3311 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3312 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3313 GEN_INT (11))));
3315 /* Probe the Backing Store if necessary. */
3316 if (bs_size > 0)
3317 emit_stack_probe (r3);
3319 /* Probe the memory stack if necessary. */
3320 if (size == 0)
3323 /* See if we have a constant small number of probes to generate. If so,
3324 that's the easy case. */
3325 else if (size <= PROBE_INTERVAL)
3326 emit_stack_probe (r2);
3328 /* The run-time loop is made up of 8 insns in the generic case while this
3329 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3330 else if (size <= 4 * PROBE_INTERVAL)
3332 HOST_WIDE_INT i;
3334 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3335 emit_insn (gen_rtx_SET (VOIDmode, r2,
3336 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3337 emit_stack_probe (r2);
3339 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3340 it exceeds SIZE. If only two probes are needed, this will not
3341 generate any code. Then probe at FIRST + SIZE. */
3342 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3344 emit_insn (gen_rtx_SET (VOIDmode, r2,
3345 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3346 emit_stack_probe (r2);
3349 emit_insn (gen_rtx_SET (VOIDmode, r2,
3350 plus_constant (Pmode, r2,
3351 (i - PROBE_INTERVAL) - size)));
3352 emit_stack_probe (r2);
3355 /* Otherwise, do the same as above, but in a loop. Note that we must be
3356 extra careful with variables wrapping around because we might be at
3357 the very top (or the very bottom) of the address space and we have
3358 to be able to handle this case properly; in particular, we use an
3359 equality test for the loop condition. */
3360 else
3362 HOST_WIDE_INT rounded_size;
3364 emit_move_insn (r2, GEN_INT (-first));
3367 /* Step 1: round SIZE to the previous multiple of the interval. */
3369 rounded_size = size & -PROBE_INTERVAL;
3372 /* Step 2: compute initial and final value of the loop counter. */
3374 /* TEST_ADDR = SP + FIRST. */
3375 emit_insn (gen_rtx_SET (VOIDmode, r2,
3376 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3378 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3379 if (rounded_size > (1 << 21))
3381 emit_move_insn (r3, GEN_INT (-rounded_size));
3382 emit_insn (gen_rtx_SET (VOIDmode, r3, gen_rtx_PLUS (Pmode, r2, r3)));
3384 else
3385 emit_insn (gen_rtx_SET (VOIDmode, r3,
3386 gen_rtx_PLUS (Pmode, r2,
3387 GEN_INT (-rounded_size))));
3390 /* Step 3: the loop
3392 while (TEST_ADDR != LAST_ADDR)
3394 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3395 probe at TEST_ADDR
3398 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3399 until it is equal to ROUNDED_SIZE. */
3401 emit_insn (gen_probe_stack_range (r2, r2, r3));
3404 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3405 that SIZE is equal to ROUNDED_SIZE. */
3407 /* TEMP = SIZE - ROUNDED_SIZE. */
3408 if (size != rounded_size)
3410 emit_insn (gen_rtx_SET (VOIDmode, r2,
3411 plus_constant (Pmode, r2,
3412 rounded_size - size)));
3413 emit_stack_probe (r2);
3417 /* Make sure nothing is scheduled before we are done. */
3418 emit_insn (gen_blockage ());
3421 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3422 absolute addresses. */
3424 const char *
3425 output_probe_stack_range (rtx reg1, rtx reg2)
3427 static int labelno = 0;
3428 char loop_lab[32], end_lab[32];
3429 rtx xops[3];
3431 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
3432 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
3434 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3436 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
3437 xops[0] = reg1;
3438 xops[1] = reg2;
3439 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3440 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3441 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]);
3442 assemble_name_raw (asm_out_file, end_lab);
3443 fputc ('\n', asm_out_file);
3445 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3446 xops[1] = GEN_INT (-PROBE_INTERVAL);
3447 output_asm_insn ("addl %0 = %1, %0", xops);
3448 fputs ("\t;;\n", asm_out_file);
3450 /* Probe at TEST_ADDR and branch. */
3451 output_asm_insn ("probe.w.fault %0, 0", xops);
3452 fprintf (asm_out_file, "\tbr ");
3453 assemble_name_raw (asm_out_file, loop_lab);
3454 fputc ('\n', asm_out_file);
3456 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
3458 return "";
3461 /* Called after register allocation to add any instructions needed for the
3462 prologue. Using a prologue insn is favored compared to putting all of the
3463 instructions in output_function_prologue(), since it allows the scheduler
3464 to intermix instructions with the saves of the caller saved registers. In
3465 some cases, it might be necessary to emit a barrier instruction as the last
3466 insn to prevent such scheduling.
3468 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3469 so that the debug info generation code can handle them properly.
3471 The register save area is laid out like so:
3472 cfa+16
3473 [ varargs spill area ]
3474 [ fr register spill area ]
3475 [ br register spill area ]
3476 [ ar register spill area ]
3477 [ pr register spill area ]
3478 [ gr register spill area ] */
3480 /* ??? Get inefficient code when the frame size is larger than can fit in an
3481 adds instruction. */
3483 void
3484 ia64_expand_prologue (void)
3486 rtx_insn *insn;
3487 rtx ar_pfs_save_reg, ar_unat_save_reg;
3488 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3489 rtx reg, alt_reg;
3491 ia64_compute_frame_size (get_frame_size ());
3492 last_scratch_gr_reg = 15;
3494 if (flag_stack_usage_info)
3495 current_function_static_stack_size = current_frame_info.total_size;
3497 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3499 HOST_WIDE_INT size = current_frame_info.total_size;
3500 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3501 + current_frame_info.n_local_regs);
3503 if (crtl->is_leaf && !cfun->calls_alloca)
3505 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3506 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3507 size - STACK_CHECK_PROTECT,
3508 bs_size);
3509 else if (size + bs_size > STACK_CHECK_PROTECT)
3510 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3512 else if (size + bs_size > 0)
3513 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3516 if (dump_file)
3518 fprintf (dump_file, "ia64 frame related registers "
3519 "recorded in current_frame_info.r[]:\n");
3520 #define PRINTREG(a) if (current_frame_info.r[a]) \
3521 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3522 PRINTREG(reg_fp);
3523 PRINTREG(reg_save_b0);
3524 PRINTREG(reg_save_pr);
3525 PRINTREG(reg_save_ar_pfs);
3526 PRINTREG(reg_save_ar_unat);
3527 PRINTREG(reg_save_ar_lc);
3528 PRINTREG(reg_save_gp);
3529 #undef PRINTREG
3532 /* If there is no epilogue, then we don't need some prologue insns.
3533 We need to avoid emitting the dead prologue insns, because flow
3534 will complain about them. */
3535 if (optimize)
3537 edge e;
3538 edge_iterator ei;
3540 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3541 if ((e->flags & EDGE_FAKE) == 0
3542 && (e->flags & EDGE_FALLTHRU) != 0)
3543 break;
3544 epilogue_p = (e != NULL);
3546 else
3547 epilogue_p = 1;
3549 /* Set the local, input, and output register names. We need to do this
3550 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3551 half. If we use in/loc/out register names, then we get assembler errors
3552 in crtn.S because there is no alloc insn or regstk directive in there. */
3553 if (! TARGET_REG_NAMES)
3555 int inputs = current_frame_info.n_input_regs;
3556 int locals = current_frame_info.n_local_regs;
3557 int outputs = current_frame_info.n_output_regs;
3559 for (i = 0; i < inputs; i++)
3560 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3561 for (i = 0; i < locals; i++)
3562 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3563 for (i = 0; i < outputs; i++)
3564 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3567 /* Set the frame pointer register name. The regnum is logically loc79,
3568 but of course we'll not have allocated that many locals. Rather than
3569 worrying about renumbering the existing rtxs, we adjust the name. */
3570 /* ??? This code means that we can never use one local register when
3571 there is a frame pointer. loc79 gets wasted in this case, as it is
3572 renamed to a register that will never be used. See also the try_locals
3573 code in find_gr_spill. */
3574 if (current_frame_info.r[reg_fp])
3576 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3577 reg_names[HARD_FRAME_POINTER_REGNUM]
3578 = reg_names[current_frame_info.r[reg_fp]];
3579 reg_names[current_frame_info.r[reg_fp]] = tmp;
3582 /* We don't need an alloc instruction if we've used no outputs or locals. */
3583 if (current_frame_info.n_local_regs == 0
3584 && current_frame_info.n_output_regs == 0
3585 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3586 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3588 /* If there is no alloc, but there are input registers used, then we
3589 need a .regstk directive. */
3590 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3591 ar_pfs_save_reg = NULL_RTX;
3593 else
3595 current_frame_info.need_regstk = 0;
3597 if (current_frame_info.r[reg_save_ar_pfs])
3599 regno = current_frame_info.r[reg_save_ar_pfs];
3600 reg_emitted (reg_save_ar_pfs);
3602 else
3603 regno = next_scratch_gr_reg ();
3604 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3606 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3607 GEN_INT (current_frame_info.n_input_regs),
3608 GEN_INT (current_frame_info.n_local_regs),
3609 GEN_INT (current_frame_info.n_output_regs),
3610 GEN_INT (current_frame_info.n_rotate_regs)));
3611 if (current_frame_info.r[reg_save_ar_pfs])
3613 RTX_FRAME_RELATED_P (insn) = 1;
3614 add_reg_note (insn, REG_CFA_REGISTER,
3615 gen_rtx_SET (VOIDmode,
3616 ar_pfs_save_reg,
3617 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3621 /* Set up frame pointer, stack pointer, and spill iterators. */
3623 n_varargs = cfun->machine->n_varargs;
3624 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3625 stack_pointer_rtx, 0);
3627 if (frame_pointer_needed)
3629 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3630 RTX_FRAME_RELATED_P (insn) = 1;
3632 /* Force the unwind info to recognize this as defining a new CFA,
3633 rather than some temp register setup. */
3634 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3637 if (current_frame_info.total_size != 0)
3639 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3640 rtx offset;
3642 if (satisfies_constraint_I (frame_size_rtx))
3643 offset = frame_size_rtx;
3644 else
3646 regno = next_scratch_gr_reg ();
3647 offset = gen_rtx_REG (DImode, regno);
3648 emit_move_insn (offset, frame_size_rtx);
3651 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3652 stack_pointer_rtx, offset));
3654 if (! frame_pointer_needed)
3656 RTX_FRAME_RELATED_P (insn) = 1;
3657 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3658 gen_rtx_SET (VOIDmode,
3659 stack_pointer_rtx,
3660 gen_rtx_PLUS (DImode,
3661 stack_pointer_rtx,
3662 frame_size_rtx)));
3665 /* ??? At this point we must generate a magic insn that appears to
3666 modify the stack pointer, the frame pointer, and all spill
3667 iterators. This would allow the most scheduling freedom. For
3668 now, just hard stop. */
3669 emit_insn (gen_blockage ());
3672 /* Must copy out ar.unat before doing any integer spills. */
3673 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3675 if (current_frame_info.r[reg_save_ar_unat])
3677 ar_unat_save_reg
3678 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3679 reg_emitted (reg_save_ar_unat);
3681 else
3683 alt_regno = next_scratch_gr_reg ();
3684 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3685 current_frame_info.gr_used_mask |= 1 << alt_regno;
3688 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3689 insn = emit_move_insn (ar_unat_save_reg, reg);
3690 if (current_frame_info.r[reg_save_ar_unat])
3692 RTX_FRAME_RELATED_P (insn) = 1;
3693 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3696 /* Even if we're not going to generate an epilogue, we still
3697 need to save the register so that EH works. */
3698 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3699 emit_insn (gen_prologue_use (ar_unat_save_reg));
3701 else
3702 ar_unat_save_reg = NULL_RTX;
3704 /* Spill all varargs registers. Do this before spilling any GR registers,
3705 since we want the UNAT bits for the GR registers to override the UNAT
3706 bits from varargs, which we don't care about. */
3708 cfa_off = -16;
3709 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3711 reg = gen_rtx_REG (DImode, regno);
3712 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3715 /* Locate the bottom of the register save area. */
3716 cfa_off = (current_frame_info.spill_cfa_off
3717 + current_frame_info.spill_size
3718 + current_frame_info.extra_spill_size);
3720 /* Save the predicate register block either in a register or in memory. */
3721 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3723 reg = gen_rtx_REG (DImode, PR_REG (0));
3724 if (current_frame_info.r[reg_save_pr] != 0)
3726 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3727 reg_emitted (reg_save_pr);
3728 insn = emit_move_insn (alt_reg, reg);
3730 /* ??? Denote pr spill/fill by a DImode move that modifies all
3731 64 hard registers. */
3732 RTX_FRAME_RELATED_P (insn) = 1;
3733 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3735 /* Even if we're not going to generate an epilogue, we still
3736 need to save the register so that EH works. */
3737 if (! epilogue_p)
3738 emit_insn (gen_prologue_use (alt_reg));
3740 else
3742 alt_regno = next_scratch_gr_reg ();
3743 alt_reg = gen_rtx_REG (DImode, alt_regno);
3744 insn = emit_move_insn (alt_reg, reg);
3745 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3746 cfa_off -= 8;
3750 /* Handle AR regs in numerical order. All of them get special handling. */
3751 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3752 && current_frame_info.r[reg_save_ar_unat] == 0)
3754 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3755 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3756 cfa_off -= 8;
3759 /* The alloc insn already copied ar.pfs into a general register. The
3760 only thing we have to do now is copy that register to a stack slot
3761 if we'd not allocated a local register for the job. */
3762 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3763 && current_frame_info.r[reg_save_ar_pfs] == 0)
3765 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3766 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3767 cfa_off -= 8;
3770 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3772 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3773 if (current_frame_info.r[reg_save_ar_lc] != 0)
3775 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3776 reg_emitted (reg_save_ar_lc);
3777 insn = emit_move_insn (alt_reg, reg);
3778 RTX_FRAME_RELATED_P (insn) = 1;
3779 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3781 /* Even if we're not going to generate an epilogue, we still
3782 need to save the register so that EH works. */
3783 if (! epilogue_p)
3784 emit_insn (gen_prologue_use (alt_reg));
3786 else
3788 alt_regno = next_scratch_gr_reg ();
3789 alt_reg = gen_rtx_REG (DImode, alt_regno);
3790 emit_move_insn (alt_reg, reg);
3791 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3792 cfa_off -= 8;
3796 /* Save the return pointer. */
3797 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3799 reg = gen_rtx_REG (DImode, BR_REG (0));
3800 if (current_frame_info.r[reg_save_b0] != 0)
3802 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3803 reg_emitted (reg_save_b0);
3804 insn = emit_move_insn (alt_reg, reg);
3805 RTX_FRAME_RELATED_P (insn) = 1;
3806 add_reg_note (insn, REG_CFA_REGISTER,
3807 gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
3809 /* Even if we're not going to generate an epilogue, we still
3810 need to save the register so that EH works. */
3811 if (! epilogue_p)
3812 emit_insn (gen_prologue_use (alt_reg));
3814 else
3816 alt_regno = next_scratch_gr_reg ();
3817 alt_reg = gen_rtx_REG (DImode, alt_regno);
3818 emit_move_insn (alt_reg, reg);
3819 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3820 cfa_off -= 8;
3824 if (current_frame_info.r[reg_save_gp])
3826 reg_emitted (reg_save_gp);
3827 insn = emit_move_insn (gen_rtx_REG (DImode,
3828 current_frame_info.r[reg_save_gp]),
3829 pic_offset_table_rtx);
3832 /* We should now be at the base of the gr/br/fr spill area. */
3833 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3834 + current_frame_info.spill_size));
3836 /* Spill all general registers. */
3837 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3838 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3840 reg = gen_rtx_REG (DImode, regno);
3841 do_spill (gen_gr_spill, reg, cfa_off, reg);
3842 cfa_off -= 8;
3845 /* Spill the rest of the BR registers. */
3846 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3847 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3849 alt_regno = next_scratch_gr_reg ();
3850 alt_reg = gen_rtx_REG (DImode, alt_regno);
3851 reg = gen_rtx_REG (DImode, regno);
3852 emit_move_insn (alt_reg, reg);
3853 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3854 cfa_off -= 8;
3857 /* Align the frame and spill all FR registers. */
3858 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3859 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3861 gcc_assert (!(cfa_off & 15));
3862 reg = gen_rtx_REG (XFmode, regno);
3863 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3864 cfa_off -= 16;
3867 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3869 finish_spill_pointers ();
3872 /* Output the textual info surrounding the prologue. */
3874 void
3875 ia64_start_function (FILE *file, const char *fnname,
3876 tree decl ATTRIBUTE_UNUSED)
3878 #if TARGET_ABI_OPEN_VMS
3879 vms_start_function (fnname);
3880 #endif
3882 fputs ("\t.proc ", file);
3883 assemble_name (file, fnname);
3884 fputc ('\n', file);
3885 ASM_OUTPUT_LABEL (file, fnname);
3888 /* Called after register allocation to add any instructions needed for the
3889 epilogue. Using an epilogue insn is favored compared to putting all of the
3890 instructions in output_function_prologue(), since it allows the scheduler
3891 to intermix instructions with the saves of the caller saved registers. In
3892 some cases, it might be necessary to emit a barrier instruction as the last
3893 insn to prevent such scheduling. */
3895 void
3896 ia64_expand_epilogue (int sibcall_p)
3898 rtx_insn *insn;
3899 rtx reg, alt_reg, ar_unat_save_reg;
3900 int regno, alt_regno, cfa_off;
3902 ia64_compute_frame_size (get_frame_size ());
3904 /* If there is a frame pointer, then we use it instead of the stack
3905 pointer, so that the stack pointer does not need to be valid when
3906 the epilogue starts. See EXIT_IGNORE_STACK. */
3907 if (frame_pointer_needed)
3908 setup_spill_pointers (current_frame_info.n_spilled,
3909 hard_frame_pointer_rtx, 0);
3910 else
3911 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3912 current_frame_info.total_size);
3914 if (current_frame_info.total_size != 0)
3916 /* ??? At this point we must generate a magic insn that appears to
3917 modify the spill iterators and the frame pointer. This would
3918 allow the most scheduling freedom. For now, just hard stop. */
3919 emit_insn (gen_blockage ());
3922 /* Locate the bottom of the register save area. */
3923 cfa_off = (current_frame_info.spill_cfa_off
3924 + current_frame_info.spill_size
3925 + current_frame_info.extra_spill_size);
3927 /* Restore the predicate registers. */
3928 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3930 if (current_frame_info.r[reg_save_pr] != 0)
3932 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3933 reg_emitted (reg_save_pr);
3935 else
3937 alt_regno = next_scratch_gr_reg ();
3938 alt_reg = gen_rtx_REG (DImode, alt_regno);
3939 do_restore (gen_movdi_x, alt_reg, cfa_off);
3940 cfa_off -= 8;
3942 reg = gen_rtx_REG (DImode, PR_REG (0));
3943 emit_move_insn (reg, alt_reg);
3946 /* Restore the application registers. */
3948 /* Load the saved unat from the stack, but do not restore it until
3949 after the GRs have been restored. */
3950 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3952 if (current_frame_info.r[reg_save_ar_unat] != 0)
3954 ar_unat_save_reg
3955 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3956 reg_emitted (reg_save_ar_unat);
3958 else
3960 alt_regno = next_scratch_gr_reg ();
3961 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3962 current_frame_info.gr_used_mask |= 1 << alt_regno;
3963 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3964 cfa_off -= 8;
3967 else
3968 ar_unat_save_reg = NULL_RTX;
3970 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3972 reg_emitted (reg_save_ar_pfs);
3973 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3974 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3975 emit_move_insn (reg, alt_reg);
3977 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3979 alt_regno = next_scratch_gr_reg ();
3980 alt_reg = gen_rtx_REG (DImode, alt_regno);
3981 do_restore (gen_movdi_x, alt_reg, cfa_off);
3982 cfa_off -= 8;
3983 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3984 emit_move_insn (reg, alt_reg);
3987 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3989 if (current_frame_info.r[reg_save_ar_lc] != 0)
3991 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3992 reg_emitted (reg_save_ar_lc);
3994 else
3996 alt_regno = next_scratch_gr_reg ();
3997 alt_reg = gen_rtx_REG (DImode, alt_regno);
3998 do_restore (gen_movdi_x, alt_reg, cfa_off);
3999 cfa_off -= 8;
4001 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4002 emit_move_insn (reg, alt_reg);
4005 /* Restore the return pointer. */
4006 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4008 if (current_frame_info.r[reg_save_b0] != 0)
4010 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4011 reg_emitted (reg_save_b0);
4013 else
4015 alt_regno = next_scratch_gr_reg ();
4016 alt_reg = gen_rtx_REG (DImode, alt_regno);
4017 do_restore (gen_movdi_x, alt_reg, cfa_off);
4018 cfa_off -= 8;
4020 reg = gen_rtx_REG (DImode, BR_REG (0));
4021 emit_move_insn (reg, alt_reg);
4024 /* We should now be at the base of the gr/br/fr spill area. */
4025 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4026 + current_frame_info.spill_size));
4028 /* The GP may be stored on the stack in the prologue, but it's
4029 never restored in the epilogue. Skip the stack slot. */
4030 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4031 cfa_off -= 8;
4033 /* Restore all general registers. */
4034 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4035 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4037 reg = gen_rtx_REG (DImode, regno);
4038 do_restore (gen_gr_restore, reg, cfa_off);
4039 cfa_off -= 8;
4042 /* Restore the branch registers. */
4043 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4044 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4046 alt_regno = next_scratch_gr_reg ();
4047 alt_reg = gen_rtx_REG (DImode, alt_regno);
4048 do_restore (gen_movdi_x, alt_reg, cfa_off);
4049 cfa_off -= 8;
4050 reg = gen_rtx_REG (DImode, regno);
4051 emit_move_insn (reg, alt_reg);
4054 /* Restore floating point registers. */
4055 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4056 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4058 gcc_assert (!(cfa_off & 15));
4059 reg = gen_rtx_REG (XFmode, regno);
4060 do_restore (gen_fr_restore_x, reg, cfa_off);
4061 cfa_off -= 16;
4064 /* Restore ar.unat for real. */
4065 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4067 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4068 emit_move_insn (reg, ar_unat_save_reg);
4071 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4073 finish_spill_pointers ();
4075 if (current_frame_info.total_size
4076 || cfun->machine->ia64_eh_epilogue_sp
4077 || frame_pointer_needed)
4079 /* ??? At this point we must generate a magic insn that appears to
4080 modify the spill iterators, the stack pointer, and the frame
4081 pointer. This would allow the most scheduling freedom. For now,
4082 just hard stop. */
4083 emit_insn (gen_blockage ());
4086 if (cfun->machine->ia64_eh_epilogue_sp)
4087 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4088 else if (frame_pointer_needed)
4090 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4091 RTX_FRAME_RELATED_P (insn) = 1;
4092 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4094 else if (current_frame_info.total_size)
4096 rtx offset, frame_size_rtx;
4098 frame_size_rtx = GEN_INT (current_frame_info.total_size);
4099 if (satisfies_constraint_I (frame_size_rtx))
4100 offset = frame_size_rtx;
4101 else
4103 regno = next_scratch_gr_reg ();
4104 offset = gen_rtx_REG (DImode, regno);
4105 emit_move_insn (offset, frame_size_rtx);
4108 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4109 offset));
4111 RTX_FRAME_RELATED_P (insn) = 1;
4112 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4113 gen_rtx_SET (VOIDmode,
4114 stack_pointer_rtx,
4115 gen_rtx_PLUS (DImode,
4116 stack_pointer_rtx,
4117 frame_size_rtx)));
4120 if (cfun->machine->ia64_eh_epilogue_bsp)
4121 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4123 if (! sibcall_p)
4124 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4125 else
4127 int fp = GR_REG (2);
4128 /* We need a throw away register here, r0 and r1 are reserved,
4129 so r2 is the first available call clobbered register. If
4130 there was a frame_pointer register, we may have swapped the
4131 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4132 sure we're using the string "r2" when emitting the register
4133 name for the assembler. */
4134 if (current_frame_info.r[reg_fp]
4135 && current_frame_info.r[reg_fp] == GR_REG (2))
4136 fp = HARD_FRAME_POINTER_REGNUM;
4138 /* We must emit an alloc to force the input registers to become output
4139 registers. Otherwise, if the callee tries to pass its parameters
4140 through to another call without an intervening alloc, then these
4141 values get lost. */
4142 /* ??? We don't need to preserve all input registers. We only need to
4143 preserve those input registers used as arguments to the sibling call.
4144 It is unclear how to compute that number here. */
4145 if (current_frame_info.n_input_regs != 0)
4147 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4149 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4150 const0_rtx, const0_rtx,
4151 n_inputs, const0_rtx));
4152 RTX_FRAME_RELATED_P (insn) = 1;
4154 /* ??? We need to mark the alloc as frame-related so that it gets
4155 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4156 But there's nothing dwarf2 related to be done wrt the register
4157 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4158 the empty parallel means dwarf2out will not see anything. */
4159 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4160 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4165 /* Return 1 if br.ret can do all the work required to return from a
4166 function. */
4169 ia64_direct_return (void)
4171 if (reload_completed && ! frame_pointer_needed)
4173 ia64_compute_frame_size (get_frame_size ());
4175 return (current_frame_info.total_size == 0
4176 && current_frame_info.n_spilled == 0
4177 && current_frame_info.r[reg_save_b0] == 0
4178 && current_frame_info.r[reg_save_pr] == 0
4179 && current_frame_info.r[reg_save_ar_pfs] == 0
4180 && current_frame_info.r[reg_save_ar_unat] == 0
4181 && current_frame_info.r[reg_save_ar_lc] == 0);
4183 return 0;
4186 /* Return the magic cookie that we use to hold the return address
4187 during early compilation. */
4190 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4192 if (count != 0)
4193 return NULL;
4194 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4197 /* Split this value after reload, now that we know where the return
4198 address is saved. */
4200 void
4201 ia64_split_return_addr_rtx (rtx dest)
4203 rtx src;
4205 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4207 if (current_frame_info.r[reg_save_b0] != 0)
4209 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4210 reg_emitted (reg_save_b0);
4212 else
4214 HOST_WIDE_INT off;
4215 unsigned int regno;
4216 rtx off_r;
4218 /* Compute offset from CFA for BR0. */
4219 /* ??? Must be kept in sync with ia64_expand_prologue. */
4220 off = (current_frame_info.spill_cfa_off
4221 + current_frame_info.spill_size);
4222 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4223 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4224 off -= 8;
4226 /* Convert CFA offset to a register based offset. */
4227 if (frame_pointer_needed)
4228 src = hard_frame_pointer_rtx;
4229 else
4231 src = stack_pointer_rtx;
4232 off += current_frame_info.total_size;
4235 /* Load address into scratch register. */
4236 off_r = GEN_INT (off);
4237 if (satisfies_constraint_I (off_r))
4238 emit_insn (gen_adddi3 (dest, src, off_r));
4239 else
4241 emit_move_insn (dest, off_r);
4242 emit_insn (gen_adddi3 (dest, src, dest));
4245 src = gen_rtx_MEM (Pmode, dest);
4248 else
4249 src = gen_rtx_REG (DImode, BR_REG (0));
4251 emit_move_insn (dest, src);
4255 ia64_hard_regno_rename_ok (int from, int to)
4257 /* Don't clobber any of the registers we reserved for the prologue. */
4258 unsigned int r;
4260 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4261 if (to == current_frame_info.r[r]
4262 || from == current_frame_info.r[r]
4263 || to == emitted_frame_related_regs[r]
4264 || from == emitted_frame_related_regs[r])
4265 return 0;
4267 /* Don't use output registers outside the register frame. */
4268 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4269 return 0;
4271 /* Retain even/oddness on predicate register pairs. */
4272 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4273 return (from & 1) == (to & 1);
4275 return 1;
4278 /* Target hook for assembling integer objects. Handle word-sized
4279 aligned objects and detect the cases when @fptr is needed. */
4281 static bool
4282 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4284 if (size == POINTER_SIZE / BITS_PER_UNIT
4285 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4286 && GET_CODE (x) == SYMBOL_REF
4287 && SYMBOL_REF_FUNCTION_P (x))
4289 static const char * const directive[2][2] = {
4290 /* 64-bit pointer */ /* 32-bit pointer */
4291 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4292 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4294 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4295 output_addr_const (asm_out_file, x);
4296 fputs (")\n", asm_out_file);
4297 return true;
4299 return default_assemble_integer (x, size, aligned_p);
4302 /* Emit the function prologue. */
4304 static void
4305 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4307 int mask, grsave, grsave_prev;
4309 if (current_frame_info.need_regstk)
4310 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4311 current_frame_info.n_input_regs,
4312 current_frame_info.n_local_regs,
4313 current_frame_info.n_output_regs,
4314 current_frame_info.n_rotate_regs);
4316 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4317 return;
4319 /* Emit the .prologue directive. */
4321 mask = 0;
4322 grsave = grsave_prev = 0;
4323 if (current_frame_info.r[reg_save_b0] != 0)
4325 mask |= 8;
4326 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4328 if (current_frame_info.r[reg_save_ar_pfs] != 0
4329 && (grsave_prev == 0
4330 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4332 mask |= 4;
4333 if (grsave_prev == 0)
4334 grsave = current_frame_info.r[reg_save_ar_pfs];
4335 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4337 if (current_frame_info.r[reg_fp] != 0
4338 && (grsave_prev == 0
4339 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4341 mask |= 2;
4342 if (grsave_prev == 0)
4343 grsave = HARD_FRAME_POINTER_REGNUM;
4344 grsave_prev = current_frame_info.r[reg_fp];
4346 if (current_frame_info.r[reg_save_pr] != 0
4347 && (grsave_prev == 0
4348 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4350 mask |= 1;
4351 if (grsave_prev == 0)
4352 grsave = current_frame_info.r[reg_save_pr];
4355 if (mask && TARGET_GNU_AS)
4356 fprintf (file, "\t.prologue %d, %d\n", mask,
4357 ia64_dbx_register_number (grsave));
4358 else
4359 fputs ("\t.prologue\n", file);
4361 /* Emit a .spill directive, if necessary, to relocate the base of
4362 the register spill area. */
4363 if (current_frame_info.spill_cfa_off != -16)
4364 fprintf (file, "\t.spill %ld\n",
4365 (long) (current_frame_info.spill_cfa_off
4366 + current_frame_info.spill_size));
4369 /* Emit the .body directive at the scheduled end of the prologue. */
4371 static void
4372 ia64_output_function_end_prologue (FILE *file)
4374 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4375 return;
4377 fputs ("\t.body\n", file);
4380 /* Emit the function epilogue. */
4382 static void
4383 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4384 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4386 int i;
4388 if (current_frame_info.r[reg_fp])
4390 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4391 reg_names[HARD_FRAME_POINTER_REGNUM]
4392 = reg_names[current_frame_info.r[reg_fp]];
4393 reg_names[current_frame_info.r[reg_fp]] = tmp;
4394 reg_emitted (reg_fp);
4396 if (! TARGET_REG_NAMES)
4398 for (i = 0; i < current_frame_info.n_input_regs; i++)
4399 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4400 for (i = 0; i < current_frame_info.n_local_regs; i++)
4401 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4402 for (i = 0; i < current_frame_info.n_output_regs; i++)
4403 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4406 current_frame_info.initialized = 0;
4410 ia64_dbx_register_number (int regno)
4412 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4413 from its home at loc79 to something inside the register frame. We
4414 must perform the same renumbering here for the debug info. */
4415 if (current_frame_info.r[reg_fp])
4417 if (regno == HARD_FRAME_POINTER_REGNUM)
4418 regno = current_frame_info.r[reg_fp];
4419 else if (regno == current_frame_info.r[reg_fp])
4420 regno = HARD_FRAME_POINTER_REGNUM;
4423 if (IN_REGNO_P (regno))
4424 return 32 + regno - IN_REG (0);
4425 else if (LOC_REGNO_P (regno))
4426 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4427 else if (OUT_REGNO_P (regno))
4428 return (32 + current_frame_info.n_input_regs
4429 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4430 else
4431 return regno;
4434 /* Implement TARGET_TRAMPOLINE_INIT.
4436 The trampoline should set the static chain pointer to value placed
4437 into the trampoline and should branch to the specified routine.
4438 To make the normal indirect-subroutine calling convention work,
4439 the trampoline must look like a function descriptor; the first
4440 word being the target address and the second being the target's
4441 global pointer.
4443 We abuse the concept of a global pointer by arranging for it
4444 to point to the data we need to load. The complete trampoline
4445 has the following form:
4447 +-------------------+ \
4448 TRAMP: | __ia64_trampoline | |
4449 +-------------------+ > fake function descriptor
4450 | TRAMP+16 | |
4451 +-------------------+ /
4452 | target descriptor |
4453 +-------------------+
4454 | static link |
4455 +-------------------+
4458 static void
4459 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4461 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4462 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4464 /* The Intel assembler requires that the global __ia64_trampoline symbol
4465 be declared explicitly */
4466 if (!TARGET_GNU_AS)
4468 static bool declared_ia64_trampoline = false;
4470 if (!declared_ia64_trampoline)
4472 declared_ia64_trampoline = true;
4473 (*targetm.asm_out.globalize_label) (asm_out_file,
4474 "__ia64_trampoline");
4478 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4479 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4480 fnaddr = convert_memory_address (Pmode, fnaddr);
4481 static_chain = convert_memory_address (Pmode, static_chain);
4483 /* Load up our iterator. */
4484 addr_reg = copy_to_reg (addr);
4485 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4487 /* The first two words are the fake descriptor:
4488 __ia64_trampoline, ADDR+16. */
4489 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4490 if (TARGET_ABI_OPEN_VMS)
4492 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4493 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4494 relocation against function symbols to make it identical to the
4495 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4496 strict ELF and dereference to get the bare code address. */
4497 rtx reg = gen_reg_rtx (Pmode);
4498 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4499 emit_move_insn (reg, tramp);
4500 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4501 tramp = reg;
4503 emit_move_insn (m_tramp, tramp);
4504 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4505 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4507 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4508 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4509 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4511 /* The third word is the target descriptor. */
4512 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4513 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4514 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4516 /* The fourth word is the static chain. */
4517 emit_move_insn (m_tramp, static_chain);
4520 /* Do any needed setup for a variadic function. CUM has not been updated
4521 for the last named argument which has type TYPE and mode MODE.
4523 We generate the actual spill instructions during prologue generation. */
4525 static void
4526 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4527 tree type, int * pretend_size,
4528 int second_time ATTRIBUTE_UNUSED)
4530 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4532 /* Skip the current argument. */
4533 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4535 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4537 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4538 *pretend_size = n * UNITS_PER_WORD;
4539 cfun->machine->n_varargs = n;
4543 /* Check whether TYPE is a homogeneous floating point aggregate. If
4544 it is, return the mode of the floating point type that appears
4545 in all leafs. If it is not, return VOIDmode.
4547 An aggregate is a homogeneous floating point aggregate is if all
4548 fields/elements in it have the same floating point type (e.g,
4549 SFmode). 128-bit quad-precision floats are excluded.
4551 Variable sized aggregates should never arrive here, since we should
4552 have already decided to pass them by reference. Top-level zero-sized
4553 aggregates are excluded because our parallels crash the middle-end. */
4555 static machine_mode
4556 hfa_element_mode (const_tree type, bool nested)
4558 machine_mode element_mode = VOIDmode;
4559 machine_mode mode;
4560 enum tree_code code = TREE_CODE (type);
4561 int know_element_mode = 0;
4562 tree t;
4564 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4565 return VOIDmode;
4567 switch (code)
4569 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4570 case BOOLEAN_TYPE: case POINTER_TYPE:
4571 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4572 case LANG_TYPE: case FUNCTION_TYPE:
4573 return VOIDmode;
4575 /* Fortran complex types are supposed to be HFAs, so we need to handle
4576 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4577 types though. */
4578 case COMPLEX_TYPE:
4579 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4580 && TYPE_MODE (type) != TCmode)
4581 return GET_MODE_INNER (TYPE_MODE (type));
4582 else
4583 return VOIDmode;
4585 case REAL_TYPE:
4586 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4587 mode if this is contained within an aggregate. */
4588 if (nested && TYPE_MODE (type) != TFmode)
4589 return TYPE_MODE (type);
4590 else
4591 return VOIDmode;
4593 case ARRAY_TYPE:
4594 return hfa_element_mode (TREE_TYPE (type), 1);
4596 case RECORD_TYPE:
4597 case UNION_TYPE:
4598 case QUAL_UNION_TYPE:
4599 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4601 if (TREE_CODE (t) != FIELD_DECL)
4602 continue;
4604 mode = hfa_element_mode (TREE_TYPE (t), 1);
4605 if (know_element_mode)
4607 if (mode != element_mode)
4608 return VOIDmode;
4610 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4611 return VOIDmode;
4612 else
4614 know_element_mode = 1;
4615 element_mode = mode;
4618 return element_mode;
4620 default:
4621 /* If we reach here, we probably have some front-end specific type
4622 that the backend doesn't know about. This can happen via the
4623 aggregate_value_p call in init_function_start. All we can do is
4624 ignore unknown tree types. */
4625 return VOIDmode;
4628 return VOIDmode;
4631 /* Return the number of words required to hold a quantity of TYPE and MODE
4632 when passed as an argument. */
4633 static int
4634 ia64_function_arg_words (const_tree type, machine_mode mode)
4636 int words;
4638 if (mode == BLKmode)
4639 words = int_size_in_bytes (type);
4640 else
4641 words = GET_MODE_SIZE (mode);
4643 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4646 /* Return the number of registers that should be skipped so the current
4647 argument (described by TYPE and WORDS) will be properly aligned.
4649 Integer and float arguments larger than 8 bytes start at the next
4650 even boundary. Aggregates larger than 8 bytes start at the next
4651 even boundary if the aggregate has 16 byte alignment. Note that
4652 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4653 but are still to be aligned in registers.
4655 ??? The ABI does not specify how to handle aggregates with
4656 alignment from 9 to 15 bytes, or greater than 16. We handle them
4657 all as if they had 16 byte alignment. Such aggregates can occur
4658 only if gcc extensions are used. */
4659 static int
4660 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4661 const_tree type, int words)
4663 /* No registers are skipped on VMS. */
4664 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4665 return 0;
4667 if (type
4668 && TREE_CODE (type) != INTEGER_TYPE
4669 && TREE_CODE (type) != REAL_TYPE)
4670 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4671 else
4672 return words > 1;
4675 /* Return rtx for register where argument is passed, or zero if it is passed
4676 on the stack. */
4677 /* ??? 128-bit quad-precision floats are always passed in general
4678 registers. */
4680 static rtx
4681 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4682 const_tree type, bool named, bool incoming)
4684 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4686 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4687 int words = ia64_function_arg_words (type, mode);
4688 int offset = ia64_function_arg_offset (cum, type, words);
4689 machine_mode hfa_mode = VOIDmode;
4691 /* For OPEN VMS, emit the instruction setting up the argument register here,
4692 when we know this will be together with the other arguments setup related
4693 insns. This is not the conceptually best place to do this, but this is
4694 the easiest as we have convenient access to cumulative args info. */
4696 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4697 && named == 1)
4699 unsigned HOST_WIDE_INT regval = cum->words;
4700 int i;
4702 for (i = 0; i < 8; i++)
4703 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4705 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4706 GEN_INT (regval));
4709 /* If all argument slots are used, then it must go on the stack. */
4710 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4711 return 0;
4713 /* On OpenVMS argument is either in Rn or Fn. */
4714 if (TARGET_ABI_OPEN_VMS)
4716 if (FLOAT_MODE_P (mode))
4717 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4718 else
4719 return gen_rtx_REG (mode, basereg + cum->words);
4722 /* Check for and handle homogeneous FP aggregates. */
4723 if (type)
4724 hfa_mode = hfa_element_mode (type, 0);
4726 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4727 and unprototyped hfas are passed specially. */
4728 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4730 rtx loc[16];
4731 int i = 0;
4732 int fp_regs = cum->fp_regs;
4733 int int_regs = cum->words + offset;
4734 int hfa_size = GET_MODE_SIZE (hfa_mode);
4735 int byte_size;
4736 int args_byte_size;
4738 /* If prototyped, pass it in FR regs then GR regs.
4739 If not prototyped, pass it in both FR and GR regs.
4741 If this is an SFmode aggregate, then it is possible to run out of
4742 FR regs while GR regs are still left. In that case, we pass the
4743 remaining part in the GR regs. */
4745 /* Fill the FP regs. We do this always. We stop if we reach the end
4746 of the argument, the last FP register, or the last argument slot. */
4748 byte_size = ((mode == BLKmode)
4749 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4750 args_byte_size = int_regs * UNITS_PER_WORD;
4751 offset = 0;
4752 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4753 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4755 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4756 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4757 + fp_regs)),
4758 GEN_INT (offset));
4759 offset += hfa_size;
4760 args_byte_size += hfa_size;
4761 fp_regs++;
4764 /* If no prototype, then the whole thing must go in GR regs. */
4765 if (! cum->prototype)
4766 offset = 0;
4767 /* If this is an SFmode aggregate, then we might have some left over
4768 that needs to go in GR regs. */
4769 else if (byte_size != offset)
4770 int_regs += offset / UNITS_PER_WORD;
4772 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4774 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4776 machine_mode gr_mode = DImode;
4777 unsigned int gr_size;
4779 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4780 then this goes in a GR reg left adjusted/little endian, right
4781 adjusted/big endian. */
4782 /* ??? Currently this is handled wrong, because 4-byte hunks are
4783 always right adjusted/little endian. */
4784 if (offset & 0x4)
4785 gr_mode = SImode;
4786 /* If we have an even 4 byte hunk because the aggregate is a
4787 multiple of 4 bytes in size, then this goes in a GR reg right
4788 adjusted/little endian. */
4789 else if (byte_size - offset == 4)
4790 gr_mode = SImode;
4792 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4793 gen_rtx_REG (gr_mode, (basereg
4794 + int_regs)),
4795 GEN_INT (offset));
4797 gr_size = GET_MODE_SIZE (gr_mode);
4798 offset += gr_size;
4799 if (gr_size == UNITS_PER_WORD
4800 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4801 int_regs++;
4802 else if (gr_size > UNITS_PER_WORD)
4803 int_regs += gr_size / UNITS_PER_WORD;
4805 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4808 /* Integral and aggregates go in general registers. If we have run out of
4809 FR registers, then FP values must also go in general registers. This can
4810 happen when we have a SFmode HFA. */
4811 else if (mode == TFmode || mode == TCmode
4812 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4814 int byte_size = ((mode == BLKmode)
4815 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4816 if (BYTES_BIG_ENDIAN
4817 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4818 && byte_size < UNITS_PER_WORD
4819 && byte_size > 0)
4821 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4822 gen_rtx_REG (DImode,
4823 (basereg + cum->words
4824 + offset)),
4825 const0_rtx);
4826 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4828 else
4829 return gen_rtx_REG (mode, basereg + cum->words + offset);
4833 /* If there is a prototype, then FP values go in a FR register when
4834 named, and in a GR register when unnamed. */
4835 else if (cum->prototype)
4837 if (named)
4838 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4839 /* In big-endian mode, an anonymous SFmode value must be represented
4840 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4841 the value into the high half of the general register. */
4842 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4843 return gen_rtx_PARALLEL (mode,
4844 gen_rtvec (1,
4845 gen_rtx_EXPR_LIST (VOIDmode,
4846 gen_rtx_REG (DImode, basereg + cum->words + offset),
4847 const0_rtx)));
4848 else
4849 return gen_rtx_REG (mode, basereg + cum->words + offset);
4851 /* If there is no prototype, then FP values go in both FR and GR
4852 registers. */
4853 else
4855 /* See comment above. */
4856 machine_mode inner_mode =
4857 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4859 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4860 gen_rtx_REG (mode, (FR_ARG_FIRST
4861 + cum->fp_regs)),
4862 const0_rtx);
4863 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4864 gen_rtx_REG (inner_mode,
4865 (basereg + cum->words
4866 + offset)),
4867 const0_rtx);
4869 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4873 /* Implement TARGET_FUNCION_ARG target hook. */
4875 static rtx
4876 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4877 const_tree type, bool named)
4879 return ia64_function_arg_1 (cum, mode, type, named, false);
4882 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4884 static rtx
4885 ia64_function_incoming_arg (cumulative_args_t cum,
4886 machine_mode mode,
4887 const_tree type, bool named)
4889 return ia64_function_arg_1 (cum, mode, type, named, true);
4892 /* Return number of bytes, at the beginning of the argument, that must be
4893 put in registers. 0 is the argument is entirely in registers or entirely
4894 in memory. */
4896 static int
4897 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4898 tree type, bool named ATTRIBUTE_UNUSED)
4900 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4902 int words = ia64_function_arg_words (type, mode);
4903 int offset = ia64_function_arg_offset (cum, type, words);
4905 /* If all argument slots are used, then it must go on the stack. */
4906 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4907 return 0;
4909 /* It doesn't matter whether the argument goes in FR or GR regs. If
4910 it fits within the 8 argument slots, then it goes entirely in
4911 registers. If it extends past the last argument slot, then the rest
4912 goes on the stack. */
4914 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4915 return 0;
4917 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4920 /* Return ivms_arg_type based on machine_mode. */
4922 static enum ivms_arg_type
4923 ia64_arg_type (machine_mode mode)
4925 switch (mode)
4927 case SFmode:
4928 return FS;
4929 case DFmode:
4930 return FT;
4931 default:
4932 return I64;
4936 /* Update CUM to point after this argument. This is patterned after
4937 ia64_function_arg. */
4939 static void
4940 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4941 const_tree type, bool named)
4943 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4944 int words = ia64_function_arg_words (type, mode);
4945 int offset = ia64_function_arg_offset (cum, type, words);
4946 machine_mode hfa_mode = VOIDmode;
4948 /* If all arg slots are already full, then there is nothing to do. */
4949 if (cum->words >= MAX_ARGUMENT_SLOTS)
4951 cum->words += words + offset;
4952 return;
4955 cum->atypes[cum->words] = ia64_arg_type (mode);
4956 cum->words += words + offset;
4958 /* On OpenVMS argument is either in Rn or Fn. */
4959 if (TARGET_ABI_OPEN_VMS)
4961 cum->int_regs = cum->words;
4962 cum->fp_regs = cum->words;
4963 return;
4966 /* Check for and handle homogeneous FP aggregates. */
4967 if (type)
4968 hfa_mode = hfa_element_mode (type, 0);
4970 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4971 and unprototyped hfas are passed specially. */
4972 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4974 int fp_regs = cum->fp_regs;
4975 /* This is the original value of cum->words + offset. */
4976 int int_regs = cum->words - words;
4977 int hfa_size = GET_MODE_SIZE (hfa_mode);
4978 int byte_size;
4979 int args_byte_size;
4981 /* If prototyped, pass it in FR regs then GR regs.
4982 If not prototyped, pass it in both FR and GR regs.
4984 If this is an SFmode aggregate, then it is possible to run out of
4985 FR regs while GR regs are still left. In that case, we pass the
4986 remaining part in the GR regs. */
4988 /* Fill the FP regs. We do this always. We stop if we reach the end
4989 of the argument, the last FP register, or the last argument slot. */
4991 byte_size = ((mode == BLKmode)
4992 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4993 args_byte_size = int_regs * UNITS_PER_WORD;
4994 offset = 0;
4995 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4996 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4998 offset += hfa_size;
4999 args_byte_size += hfa_size;
5000 fp_regs++;
5003 cum->fp_regs = fp_regs;
5006 /* Integral and aggregates go in general registers. So do TFmode FP values.
5007 If we have run out of FR registers, then other FP values must also go in
5008 general registers. This can happen when we have a SFmode HFA. */
5009 else if (mode == TFmode || mode == TCmode
5010 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
5011 cum->int_regs = cum->words;
5013 /* If there is a prototype, then FP values go in a FR register when
5014 named, and in a GR register when unnamed. */
5015 else if (cum->prototype)
5017 if (! named)
5018 cum->int_regs = cum->words;
5019 else
5020 /* ??? Complex types should not reach here. */
5021 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5023 /* If there is no prototype, then FP values go in both FR and GR
5024 registers. */
5025 else
5027 /* ??? Complex types should not reach here. */
5028 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5029 cum->int_regs = cum->words;
5033 /* Arguments with alignment larger than 8 bytes start at the next even
5034 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5035 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5037 static unsigned int
5038 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5040 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5041 return PARM_BOUNDARY * 2;
5043 if (type)
5045 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5046 return PARM_BOUNDARY * 2;
5047 else
5048 return PARM_BOUNDARY;
5051 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5052 return PARM_BOUNDARY * 2;
5053 else
5054 return PARM_BOUNDARY;
5057 /* True if it is OK to do sibling call optimization for the specified
5058 call expression EXP. DECL will be the called function, or NULL if
5059 this is an indirect call. */
5060 static bool
5061 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5063 /* We can't perform a sibcall if the current function has the syscall_linkage
5064 attribute. */
5065 if (lookup_attribute ("syscall_linkage",
5066 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5067 return false;
5069 /* We must always return with our current GP. This means we can
5070 only sibcall to functions defined in the current module unless
5071 TARGET_CONST_GP is set to true. */
5072 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5076 /* Implement va_arg. */
5078 static tree
5079 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5080 gimple_seq *post_p)
5082 /* Variable sized types are passed by reference. */
5083 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5085 tree ptrtype = build_pointer_type (type);
5086 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5087 return build_va_arg_indirect_ref (addr);
5090 /* Aggregate arguments with alignment larger than 8 bytes start at
5091 the next even boundary. Integer and floating point arguments
5092 do so if they are larger than 8 bytes, whether or not they are
5093 also aligned larger than 8 bytes. */
5094 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5095 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5097 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5098 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5099 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5100 gimplify_assign (unshare_expr (valist), t, pre_p);
5103 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5106 /* Return 1 if function return value returned in memory. Return 0 if it is
5107 in a register. */
5109 static bool
5110 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5112 machine_mode mode;
5113 machine_mode hfa_mode;
5114 HOST_WIDE_INT byte_size;
5116 mode = TYPE_MODE (valtype);
5117 byte_size = GET_MODE_SIZE (mode);
5118 if (mode == BLKmode)
5120 byte_size = int_size_in_bytes (valtype);
5121 if (byte_size < 0)
5122 return true;
5125 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5127 hfa_mode = hfa_element_mode (valtype, 0);
5128 if (hfa_mode != VOIDmode)
5130 int hfa_size = GET_MODE_SIZE (hfa_mode);
5132 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5133 return true;
5134 else
5135 return false;
5137 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5138 return true;
5139 else
5140 return false;
5143 /* Return rtx for register that holds the function return value. */
5145 static rtx
5146 ia64_function_value (const_tree valtype,
5147 const_tree fn_decl_or_type,
5148 bool outgoing ATTRIBUTE_UNUSED)
5150 machine_mode mode;
5151 machine_mode hfa_mode;
5152 int unsignedp;
5153 const_tree func = fn_decl_or_type;
5155 if (fn_decl_or_type
5156 && !DECL_P (fn_decl_or_type))
5157 func = NULL;
5159 mode = TYPE_MODE (valtype);
5160 hfa_mode = hfa_element_mode (valtype, 0);
5162 if (hfa_mode != VOIDmode)
5164 rtx loc[8];
5165 int i;
5166 int hfa_size;
5167 int byte_size;
5168 int offset;
5170 hfa_size = GET_MODE_SIZE (hfa_mode);
5171 byte_size = ((mode == BLKmode)
5172 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5173 offset = 0;
5174 for (i = 0; offset < byte_size; i++)
5176 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5177 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5178 GEN_INT (offset));
5179 offset += hfa_size;
5181 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5183 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5184 return gen_rtx_REG (mode, FR_ARG_FIRST);
5185 else
5187 bool need_parallel = false;
5189 /* In big-endian mode, we need to manage the layout of aggregates
5190 in the registers so that we get the bits properly aligned in
5191 the highpart of the registers. */
5192 if (BYTES_BIG_ENDIAN
5193 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5194 need_parallel = true;
5196 /* Something like struct S { long double x; char a[0] } is not an
5197 HFA structure, and therefore doesn't go in fp registers. But
5198 the middle-end will give it XFmode anyway, and XFmode values
5199 don't normally fit in integer registers. So we need to smuggle
5200 the value inside a parallel. */
5201 else if (mode == XFmode || mode == XCmode || mode == RFmode)
5202 need_parallel = true;
5204 if (need_parallel)
5206 rtx loc[8];
5207 int offset;
5208 int bytesize;
5209 int i;
5211 offset = 0;
5212 bytesize = int_size_in_bytes (valtype);
5213 /* An empty PARALLEL is invalid here, but the return value
5214 doesn't matter for empty structs. */
5215 if (bytesize == 0)
5216 return gen_rtx_REG (mode, GR_RET_FIRST);
5217 for (i = 0; offset < bytesize; i++)
5219 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5220 gen_rtx_REG (DImode,
5221 GR_RET_FIRST + i),
5222 GEN_INT (offset));
5223 offset += UNITS_PER_WORD;
5225 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5228 mode = promote_function_mode (valtype, mode, &unsignedp,
5229 func ? TREE_TYPE (func) : NULL_TREE,
5230 true);
5232 return gen_rtx_REG (mode, GR_RET_FIRST);
5236 /* Worker function for TARGET_LIBCALL_VALUE. */
5238 static rtx
5239 ia64_libcall_value (machine_mode mode,
5240 const_rtx fun ATTRIBUTE_UNUSED)
5242 return gen_rtx_REG (mode,
5243 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5244 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5245 && (mode) != TFmode)
5246 ? FR_RET_FIRST : GR_RET_FIRST));
5249 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5251 static bool
5252 ia64_function_value_regno_p (const unsigned int regno)
5254 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5255 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5258 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5259 We need to emit DTP-relative relocations. */
5261 static void
5262 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5264 gcc_assert (size == 4 || size == 8);
5265 if (size == 4)
5266 fputs ("\tdata4.ua\t@dtprel(", file);
5267 else
5268 fputs ("\tdata8.ua\t@dtprel(", file);
5269 output_addr_const (file, x);
5270 fputs (")", file);
5273 /* Print a memory address as an operand to reference that memory location. */
5275 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5276 also call this from ia64_print_operand for memory addresses. */
5278 static void
5279 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5280 rtx address ATTRIBUTE_UNUSED)
5284 /* Print an operand to an assembler instruction.
5285 C Swap and print a comparison operator.
5286 D Print an FP comparison operator.
5287 E Print 32 - constant, for SImode shifts as extract.
5288 e Print 64 - constant, for DImode rotates.
5289 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5290 a floating point register emitted normally.
5291 G A floating point constant.
5292 I Invert a predicate register by adding 1.
5293 J Select the proper predicate register for a condition.
5294 j Select the inverse predicate register for a condition.
5295 O Append .acq for volatile load.
5296 P Postincrement of a MEM.
5297 Q Append .rel for volatile store.
5298 R Print .s .d or nothing for a single, double or no truncation.
5299 S Shift amount for shladd instruction.
5300 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5301 for Intel assembler.
5302 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5303 for Intel assembler.
5304 X A pair of floating point registers.
5305 r Print register name, or constant 0 as r0. HP compatibility for
5306 Linux kernel.
5307 v Print vector constant value as an 8-byte integer value. */
5309 static void
5310 ia64_print_operand (FILE * file, rtx x, int code)
5312 const char *str;
5314 switch (code)
5316 case 0:
5317 /* Handled below. */
5318 break;
5320 case 'C':
5322 enum rtx_code c = swap_condition (GET_CODE (x));
5323 fputs (GET_RTX_NAME (c), file);
5324 return;
5327 case 'D':
5328 switch (GET_CODE (x))
5330 case NE:
5331 str = "neq";
5332 break;
5333 case UNORDERED:
5334 str = "unord";
5335 break;
5336 case ORDERED:
5337 str = "ord";
5338 break;
5339 case UNLT:
5340 str = "nge";
5341 break;
5342 case UNLE:
5343 str = "ngt";
5344 break;
5345 case UNGT:
5346 str = "nle";
5347 break;
5348 case UNGE:
5349 str = "nlt";
5350 break;
5351 case UNEQ:
5352 case LTGT:
5353 gcc_unreachable ();
5354 default:
5355 str = GET_RTX_NAME (GET_CODE (x));
5356 break;
5358 fputs (str, file);
5359 return;
5361 case 'E':
5362 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5363 return;
5365 case 'e':
5366 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5367 return;
5369 case 'F':
5370 if (x == CONST0_RTX (GET_MODE (x)))
5371 str = reg_names [FR_REG (0)];
5372 else if (x == CONST1_RTX (GET_MODE (x)))
5373 str = reg_names [FR_REG (1)];
5374 else
5376 gcc_assert (GET_CODE (x) == REG);
5377 str = reg_names [REGNO (x)];
5379 fputs (str, file);
5380 return;
5382 case 'G':
5384 long val[4];
5385 REAL_VALUE_TYPE rv;
5386 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5387 real_to_target (val, &rv, GET_MODE (x));
5388 if (GET_MODE (x) == SFmode)
5389 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5390 else if (GET_MODE (x) == DFmode)
5391 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5392 & 0xffffffff,
5393 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5394 & 0xffffffff);
5395 else
5396 output_operand_lossage ("invalid %%G mode");
5398 return;
5400 case 'I':
5401 fputs (reg_names [REGNO (x) + 1], file);
5402 return;
5404 case 'J':
5405 case 'j':
5407 unsigned int regno = REGNO (XEXP (x, 0));
5408 if (GET_CODE (x) == EQ)
5409 regno += 1;
5410 if (code == 'j')
5411 regno ^= 1;
5412 fputs (reg_names [regno], file);
5414 return;
5416 case 'O':
5417 if (MEM_VOLATILE_P (x))
5418 fputs(".acq", file);
5419 return;
5421 case 'P':
5423 HOST_WIDE_INT value;
5425 switch (GET_CODE (XEXP (x, 0)))
5427 default:
5428 return;
5430 case POST_MODIFY:
5431 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5432 if (GET_CODE (x) == CONST_INT)
5433 value = INTVAL (x);
5434 else
5436 gcc_assert (GET_CODE (x) == REG);
5437 fprintf (file, ", %s", reg_names[REGNO (x)]);
5438 return;
5440 break;
5442 case POST_INC:
5443 value = GET_MODE_SIZE (GET_MODE (x));
5444 break;
5446 case POST_DEC:
5447 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5448 break;
5451 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5452 return;
5455 case 'Q':
5456 if (MEM_VOLATILE_P (x))
5457 fputs(".rel", file);
5458 return;
5460 case 'R':
5461 if (x == CONST0_RTX (GET_MODE (x)))
5462 fputs(".s", file);
5463 else if (x == CONST1_RTX (GET_MODE (x)))
5464 fputs(".d", file);
5465 else if (x == CONST2_RTX (GET_MODE (x)))
5467 else
5468 output_operand_lossage ("invalid %%R value");
5469 return;
5471 case 'S':
5472 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5473 return;
5475 case 'T':
5476 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5478 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5479 return;
5481 break;
5483 case 'U':
5484 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5486 const char *prefix = "0x";
5487 if (INTVAL (x) & 0x80000000)
5489 fprintf (file, "0xffffffff");
5490 prefix = "";
5492 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5493 return;
5495 break;
5497 case 'X':
5499 unsigned int regno = REGNO (x);
5500 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5502 return;
5504 case 'r':
5505 /* If this operand is the constant zero, write it as register zero.
5506 Any register, zero, or CONST_INT value is OK here. */
5507 if (GET_CODE (x) == REG)
5508 fputs (reg_names[REGNO (x)], file);
5509 else if (x == CONST0_RTX (GET_MODE (x)))
5510 fputs ("r0", file);
5511 else if (GET_CODE (x) == CONST_INT)
5512 output_addr_const (file, x);
5513 else
5514 output_operand_lossage ("invalid %%r value");
5515 return;
5517 case 'v':
5518 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5519 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5520 break;
5522 case '+':
5524 const char *which;
5526 /* For conditional branches, returns or calls, substitute
5527 sptk, dptk, dpnt, or spnt for %s. */
5528 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5529 if (x)
5531 int pred_val = XINT (x, 0);
5533 /* Guess top and bottom 10% statically predicted. */
5534 if (pred_val < REG_BR_PROB_BASE / 50
5535 && br_prob_note_reliable_p (x))
5536 which = ".spnt";
5537 else if (pred_val < REG_BR_PROB_BASE / 2)
5538 which = ".dpnt";
5539 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5540 || !br_prob_note_reliable_p (x))
5541 which = ".dptk";
5542 else
5543 which = ".sptk";
5545 else if (CALL_P (current_output_insn))
5546 which = ".sptk";
5547 else
5548 which = ".dptk";
5550 fputs (which, file);
5551 return;
5554 case ',':
5555 x = current_insn_predicate;
5556 if (x)
5558 unsigned int regno = REGNO (XEXP (x, 0));
5559 if (GET_CODE (x) == EQ)
5560 regno += 1;
5561 fprintf (file, "(%s) ", reg_names [regno]);
5563 return;
5565 default:
5566 output_operand_lossage ("ia64_print_operand: unknown code");
5567 return;
5570 switch (GET_CODE (x))
5572 /* This happens for the spill/restore instructions. */
5573 case POST_INC:
5574 case POST_DEC:
5575 case POST_MODIFY:
5576 x = XEXP (x, 0);
5577 /* ... fall through ... */
5579 case REG:
5580 fputs (reg_names [REGNO (x)], file);
5581 break;
5583 case MEM:
5585 rtx addr = XEXP (x, 0);
5586 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5587 addr = XEXP (addr, 0);
5588 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5589 break;
5592 default:
5593 output_addr_const (file, x);
5594 break;
5597 return;
5600 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5602 static bool
5603 ia64_print_operand_punct_valid_p (unsigned char code)
5605 return (code == '+' || code == ',');
5608 /* Compute a (partial) cost for rtx X. Return true if the complete
5609 cost has been computed, and false if subexpressions should be
5610 scanned. In either case, *TOTAL contains the cost result. */
5611 /* ??? This is incomplete. */
5613 static bool
5614 ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5615 int *total, bool speed ATTRIBUTE_UNUSED)
5617 switch (code)
5619 case CONST_INT:
5620 switch (outer_code)
5622 case SET:
5623 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5624 return true;
5625 case PLUS:
5626 if (satisfies_constraint_I (x))
5627 *total = 0;
5628 else if (satisfies_constraint_J (x))
5629 *total = 1;
5630 else
5631 *total = COSTS_N_INSNS (1);
5632 return true;
5633 default:
5634 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5635 *total = 0;
5636 else
5637 *total = COSTS_N_INSNS (1);
5638 return true;
5641 case CONST_DOUBLE:
5642 *total = COSTS_N_INSNS (1);
5643 return true;
5645 case CONST:
5646 case SYMBOL_REF:
5647 case LABEL_REF:
5648 *total = COSTS_N_INSNS (3);
5649 return true;
5651 case FMA:
5652 *total = COSTS_N_INSNS (4);
5653 return true;
5655 case MULT:
5656 /* For multiplies wider than HImode, we have to go to the FPU,
5657 which normally involves copies. Plus there's the latency
5658 of the multiply itself, and the latency of the instructions to
5659 transfer integer regs to FP regs. */
5660 if (FLOAT_MODE_P (GET_MODE (x)))
5661 *total = COSTS_N_INSNS (4);
5662 else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5663 *total = COSTS_N_INSNS (10);
5664 else
5665 *total = COSTS_N_INSNS (2);
5666 return true;
5668 case PLUS:
5669 case MINUS:
5670 if (FLOAT_MODE_P (GET_MODE (x)))
5672 *total = COSTS_N_INSNS (4);
5673 return true;
5675 /* FALLTHRU */
5677 case ASHIFT:
5678 case ASHIFTRT:
5679 case LSHIFTRT:
5680 *total = COSTS_N_INSNS (1);
5681 return true;
5683 case DIV:
5684 case UDIV:
5685 case MOD:
5686 case UMOD:
5687 /* We make divide expensive, so that divide-by-constant will be
5688 optimized to a multiply. */
5689 *total = COSTS_N_INSNS (60);
5690 return true;
5692 default:
5693 return false;
5697 /* Calculate the cost of moving data from a register in class FROM to
5698 one in class TO, using MODE. */
5700 static int
5701 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5702 reg_class_t to)
5704 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5705 if (to == ADDL_REGS)
5706 to = GR_REGS;
5707 if (from == ADDL_REGS)
5708 from = GR_REGS;
5710 /* All costs are symmetric, so reduce cases by putting the
5711 lower number class as the destination. */
5712 if (from < to)
5714 reg_class_t tmp = to;
5715 to = from, from = tmp;
5718 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5719 so that we get secondary memory reloads. Between FR_REGS,
5720 we have to make this at least as expensive as memory_move_cost
5721 to avoid spectacularly poor register class preferencing. */
5722 if (mode == XFmode || mode == RFmode)
5724 if (to != GR_REGS || from != GR_REGS)
5725 return memory_move_cost (mode, to, false);
5726 else
5727 return 3;
5730 switch (to)
5732 case PR_REGS:
5733 /* Moving between PR registers takes two insns. */
5734 if (from == PR_REGS)
5735 return 3;
5736 /* Moving between PR and anything but GR is impossible. */
5737 if (from != GR_REGS)
5738 return memory_move_cost (mode, to, false);
5739 break;
5741 case BR_REGS:
5742 /* Moving between BR and anything but GR is impossible. */
5743 if (from != GR_REGS && from != GR_AND_BR_REGS)
5744 return memory_move_cost (mode, to, false);
5745 break;
5747 case AR_I_REGS:
5748 case AR_M_REGS:
5749 /* Moving between AR and anything but GR is impossible. */
5750 if (from != GR_REGS)
5751 return memory_move_cost (mode, to, false);
5752 break;
5754 case GR_REGS:
5755 case FR_REGS:
5756 case FP_REGS:
5757 case GR_AND_FR_REGS:
5758 case GR_AND_BR_REGS:
5759 case ALL_REGS:
5760 break;
5762 default:
5763 gcc_unreachable ();
5766 return 2;
5769 /* Calculate the cost of moving data of MODE from a register to or from
5770 memory. */
5772 static int
5773 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5774 reg_class_t rclass,
5775 bool in ATTRIBUTE_UNUSED)
5777 if (rclass == GENERAL_REGS
5778 || rclass == FR_REGS
5779 || rclass == FP_REGS
5780 || rclass == GR_AND_FR_REGS)
5781 return 4;
5782 else
5783 return 10;
5786 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5787 on RCLASS to use when copying X into that class. */
5789 static reg_class_t
5790 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5792 switch (rclass)
5794 case FR_REGS:
5795 case FP_REGS:
5796 /* Don't allow volatile mem reloads into floating point registers.
5797 This is defined to force reload to choose the r/m case instead
5798 of the f/f case when reloading (set (reg fX) (mem/v)). */
5799 if (MEM_P (x) && MEM_VOLATILE_P (x))
5800 return NO_REGS;
5802 /* Force all unrecognized constants into the constant pool. */
5803 if (CONSTANT_P (x))
5804 return NO_REGS;
5805 break;
5807 case AR_M_REGS:
5808 case AR_I_REGS:
5809 if (!OBJECT_P (x))
5810 return NO_REGS;
5811 break;
5813 default:
5814 break;
5817 return rclass;
5820 /* This function returns the register class required for a secondary
5821 register when copying between one of the registers in RCLASS, and X,
5822 using MODE. A return value of NO_REGS means that no secondary register
5823 is required. */
5825 enum reg_class
5826 ia64_secondary_reload_class (enum reg_class rclass,
5827 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5829 int regno = -1;
5831 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5832 regno = true_regnum (x);
5834 switch (rclass)
5836 case BR_REGS:
5837 case AR_M_REGS:
5838 case AR_I_REGS:
5839 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5840 interaction. We end up with two pseudos with overlapping lifetimes
5841 both of which are equiv to the same constant, and both which need
5842 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5843 changes depending on the path length, which means the qty_first_reg
5844 check in make_regs_eqv can give different answers at different times.
5845 At some point I'll probably need a reload_indi pattern to handle
5846 this.
5848 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5849 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5850 non-general registers for good measure. */
5851 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5852 return GR_REGS;
5854 /* This is needed if a pseudo used as a call_operand gets spilled to a
5855 stack slot. */
5856 if (GET_CODE (x) == MEM)
5857 return GR_REGS;
5858 break;
5860 case FR_REGS:
5861 case FP_REGS:
5862 /* Need to go through general registers to get to other class regs. */
5863 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5864 return GR_REGS;
5866 /* This can happen when a paradoxical subreg is an operand to the
5867 muldi3 pattern. */
5868 /* ??? This shouldn't be necessary after instruction scheduling is
5869 enabled, because paradoxical subregs are not accepted by
5870 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5871 stop the paradoxical subreg stupidity in the *_operand functions
5872 in recog.c. */
5873 if (GET_CODE (x) == MEM
5874 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5875 || GET_MODE (x) == QImode))
5876 return GR_REGS;
5878 /* This can happen because of the ior/and/etc patterns that accept FP
5879 registers as operands. If the third operand is a constant, then it
5880 needs to be reloaded into a FP register. */
5881 if (GET_CODE (x) == CONST_INT)
5882 return GR_REGS;
5884 /* This can happen because of register elimination in a muldi3 insn.
5885 E.g. `26107 * (unsigned long)&u'. */
5886 if (GET_CODE (x) == PLUS)
5887 return GR_REGS;
5888 break;
5890 case PR_REGS:
5891 /* ??? This happens if we cse/gcse a BImode value across a call,
5892 and the function has a nonlocal goto. This is because global
5893 does not allocate call crossing pseudos to hard registers when
5894 crtl->has_nonlocal_goto is true. This is relatively
5895 common for C++ programs that use exceptions. To reproduce,
5896 return NO_REGS and compile libstdc++. */
5897 if (GET_CODE (x) == MEM)
5898 return GR_REGS;
5900 /* This can happen when we take a BImode subreg of a DImode value,
5901 and that DImode value winds up in some non-GR register. */
5902 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5903 return GR_REGS;
5904 break;
5906 default:
5907 break;
5910 return NO_REGS;
5914 /* Implement targetm.unspec_may_trap_p hook. */
5915 static int
5916 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5918 switch (XINT (x, 1))
5920 case UNSPEC_LDA:
5921 case UNSPEC_LDS:
5922 case UNSPEC_LDSA:
5923 case UNSPEC_LDCCLR:
5924 case UNSPEC_CHKACLR:
5925 case UNSPEC_CHKS:
5926 /* These unspecs are just wrappers. */
5927 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5930 return default_unspec_may_trap_p (x, flags);
5934 /* Parse the -mfixed-range= option string. */
5936 static void
5937 fix_range (const char *const_str)
5939 int i, first, last;
5940 char *str, *dash, *comma;
5942 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5943 REG2 are either register names or register numbers. The effect
5944 of this option is to mark the registers in the range from REG1 to
5945 REG2 as ``fixed'' so they won't be used by the compiler. This is
5946 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5948 i = strlen (const_str);
5949 str = (char *) alloca (i + 1);
5950 memcpy (str, const_str, i + 1);
5952 while (1)
5954 dash = strchr (str, '-');
5955 if (!dash)
5957 warning (0, "value of -mfixed-range must have form REG1-REG2");
5958 return;
5960 *dash = '\0';
5962 comma = strchr (dash + 1, ',');
5963 if (comma)
5964 *comma = '\0';
5966 first = decode_reg_name (str);
5967 if (first < 0)
5969 warning (0, "unknown register name: %s", str);
5970 return;
5973 last = decode_reg_name (dash + 1);
5974 if (last < 0)
5976 warning (0, "unknown register name: %s", dash + 1);
5977 return;
5980 *dash = '-';
5982 if (first > last)
5984 warning (0, "%s-%s is an empty range", str, dash + 1);
5985 return;
5988 for (i = first; i <= last; ++i)
5989 fixed_regs[i] = call_used_regs[i] = 1;
5991 if (!comma)
5992 break;
5994 *comma = ',';
5995 str = comma + 1;
5999 /* Implement TARGET_OPTION_OVERRIDE. */
6001 static void
6002 ia64_option_override (void)
6004 unsigned int i;
6005 cl_deferred_option *opt;
6006 vec<cl_deferred_option> *v
6007 = (vec<cl_deferred_option> *) ia64_deferred_options;
6009 if (v)
6010 FOR_EACH_VEC_ELT (*v, i, opt)
6012 switch (opt->opt_index)
6014 case OPT_mfixed_range_:
6015 fix_range (opt->arg);
6016 break;
6018 default:
6019 gcc_unreachable ();
6023 if (TARGET_AUTO_PIC)
6024 target_flags |= MASK_CONST_GP;
6026 /* Numerous experiment shows that IRA based loop pressure
6027 calculation works better for RTL loop invariant motion on targets
6028 with enough (>= 32) registers. It is an expensive optimization.
6029 So it is on only for peak performance. */
6030 if (optimize >= 3)
6031 flag_ira_loop_pressure = 1;
6034 ia64_section_threshold = (global_options_set.x_g_switch_value
6035 ? g_switch_value
6036 : IA64_DEFAULT_GVALUE);
6038 init_machine_status = ia64_init_machine_status;
6040 if (align_functions <= 0)
6041 align_functions = 64;
6042 if (align_loops <= 0)
6043 align_loops = 32;
6044 if (TARGET_ABI_OPEN_VMS)
6045 flag_no_common = 1;
6047 ia64_override_options_after_change();
6050 /* Implement targetm.override_options_after_change. */
6052 static void
6053 ia64_override_options_after_change (void)
6055 if (optimize >= 3
6056 && !global_options_set.x_flag_selective_scheduling
6057 && !global_options_set.x_flag_selective_scheduling2)
6059 flag_selective_scheduling2 = 1;
6060 flag_sel_sched_pipelining = 1;
6062 if (mflag_sched_control_spec == 2)
6064 /* Control speculation is on by default for the selective scheduler,
6065 but not for the Haifa scheduler. */
6066 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6068 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6070 /* FIXME: remove this when we'd implement breaking autoinsns as
6071 a transformation. */
6072 flag_auto_inc_dec = 0;
6076 /* Initialize the record of emitted frame related registers. */
6078 void ia64_init_expanders (void)
6080 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6083 static struct machine_function *
6084 ia64_init_machine_status (void)
6086 return ggc_cleared_alloc<machine_function> ();
6089 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6090 static enum attr_type ia64_safe_type (rtx_insn *);
6092 static enum attr_itanium_class
6093 ia64_safe_itanium_class (rtx_insn *insn)
6095 if (recog_memoized (insn) >= 0)
6096 return get_attr_itanium_class (insn);
6097 else if (DEBUG_INSN_P (insn))
6098 return ITANIUM_CLASS_IGNORE;
6099 else
6100 return ITANIUM_CLASS_UNKNOWN;
6103 static enum attr_type
6104 ia64_safe_type (rtx_insn *insn)
6106 if (recog_memoized (insn) >= 0)
6107 return get_attr_type (insn);
6108 else
6109 return TYPE_UNKNOWN;
6112 /* The following collection of routines emit instruction group stop bits as
6113 necessary to avoid dependencies. */
6115 /* Need to track some additional registers as far as serialization is
6116 concerned so we can properly handle br.call and br.ret. We could
6117 make these registers visible to gcc, but since these registers are
6118 never explicitly used in gcc generated code, it seems wasteful to
6119 do so (plus it would make the call and return patterns needlessly
6120 complex). */
6121 #define REG_RP (BR_REG (0))
6122 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6123 /* This is used for volatile asms which may require a stop bit immediately
6124 before and after them. */
6125 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6126 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6127 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6129 /* For each register, we keep track of how it has been written in the
6130 current instruction group.
6132 If a register is written unconditionally (no qualifying predicate),
6133 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6135 If a register is written if its qualifying predicate P is true, we
6136 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6137 may be written again by the complement of P (P^1) and when this happens,
6138 WRITE_COUNT gets set to 2.
6140 The result of this is that whenever an insn attempts to write a register
6141 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6143 If a predicate register is written by a floating-point insn, we set
6144 WRITTEN_BY_FP to true.
6146 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6147 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6149 #if GCC_VERSION >= 4000
6150 #define RWS_FIELD_TYPE __extension__ unsigned short
6151 #else
6152 #define RWS_FIELD_TYPE unsigned int
6153 #endif
6154 struct reg_write_state
6156 RWS_FIELD_TYPE write_count : 2;
6157 RWS_FIELD_TYPE first_pred : 10;
6158 RWS_FIELD_TYPE written_by_fp : 1;
6159 RWS_FIELD_TYPE written_by_and : 1;
6160 RWS_FIELD_TYPE written_by_or : 1;
6163 /* Cumulative info for the current instruction group. */
6164 struct reg_write_state rws_sum[NUM_REGS];
6165 #ifdef ENABLE_CHECKING
6166 /* Bitmap whether a register has been written in the current insn. */
6167 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6168 / HOST_BITS_PER_WIDEST_FAST_INT];
6170 static inline void
6171 rws_insn_set (int regno)
6173 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6174 SET_HARD_REG_BIT (rws_insn, regno);
6177 static inline int
6178 rws_insn_test (int regno)
6180 return TEST_HARD_REG_BIT (rws_insn, regno);
6182 #else
6183 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6184 unsigned char rws_insn[2];
6186 static inline void
6187 rws_insn_set (int regno)
6189 if (regno == REG_AR_CFM)
6190 rws_insn[0] = 1;
6191 else if (regno == REG_VOLATILE)
6192 rws_insn[1] = 1;
6195 static inline int
6196 rws_insn_test (int regno)
6198 if (regno == REG_AR_CFM)
6199 return rws_insn[0];
6200 if (regno == REG_VOLATILE)
6201 return rws_insn[1];
6202 return 0;
6204 #endif
6206 /* Indicates whether this is the first instruction after a stop bit,
6207 in which case we don't need another stop bit. Without this,
6208 ia64_variable_issue will die when scheduling an alloc. */
6209 static int first_instruction;
6211 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6212 RTL for one instruction. */
6213 struct reg_flags
6215 unsigned int is_write : 1; /* Is register being written? */
6216 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6217 unsigned int is_branch : 1; /* Is register used as part of a branch? */
6218 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6219 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
6220 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6223 static void rws_update (int, struct reg_flags, int);
6224 static int rws_access_regno (int, struct reg_flags, int);
6225 static int rws_access_reg (rtx, struct reg_flags, int);
6226 static void update_set_flags (rtx, struct reg_flags *);
6227 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6228 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6229 static void init_insn_group_barriers (void);
6230 static int group_barrier_needed (rtx_insn *);
6231 static int safe_group_barrier_needed (rtx_insn *);
6232 static int in_safe_group_barrier;
6234 /* Update *RWS for REGNO, which is being written by the current instruction,
6235 with predicate PRED, and associated register flags in FLAGS. */
6237 static void
6238 rws_update (int regno, struct reg_flags flags, int pred)
6240 if (pred)
6241 rws_sum[regno].write_count++;
6242 else
6243 rws_sum[regno].write_count = 2;
6244 rws_sum[regno].written_by_fp |= flags.is_fp;
6245 /* ??? Not tracking and/or across differing predicates. */
6246 rws_sum[regno].written_by_and = flags.is_and;
6247 rws_sum[regno].written_by_or = flags.is_or;
6248 rws_sum[regno].first_pred = pred;
6251 /* Handle an access to register REGNO of type FLAGS using predicate register
6252 PRED. Update rws_sum array. Return 1 if this access creates
6253 a dependency with an earlier instruction in the same group. */
6255 static int
6256 rws_access_regno (int regno, struct reg_flags flags, int pred)
6258 int need_barrier = 0;
6260 gcc_assert (regno < NUM_REGS);
6262 if (! PR_REGNO_P (regno))
6263 flags.is_and = flags.is_or = 0;
6265 if (flags.is_write)
6267 int write_count;
6269 rws_insn_set (regno);
6270 write_count = rws_sum[regno].write_count;
6272 switch (write_count)
6274 case 0:
6275 /* The register has not been written yet. */
6276 if (!in_safe_group_barrier)
6277 rws_update (regno, flags, pred);
6278 break;
6280 case 1:
6281 /* The register has been written via a predicate. Treat
6282 it like a unconditional write and do not try to check
6283 for complementary pred reg in earlier write. */
6284 if (flags.is_and && rws_sum[regno].written_by_and)
6286 else if (flags.is_or && rws_sum[regno].written_by_or)
6288 else
6289 need_barrier = 1;
6290 if (!in_safe_group_barrier)
6291 rws_update (regno, flags, pred);
6292 break;
6294 case 2:
6295 /* The register has been unconditionally written already. We
6296 need a barrier. */
6297 if (flags.is_and && rws_sum[regno].written_by_and)
6299 else if (flags.is_or && rws_sum[regno].written_by_or)
6301 else
6302 need_barrier = 1;
6303 if (!in_safe_group_barrier)
6305 rws_sum[regno].written_by_and = flags.is_and;
6306 rws_sum[regno].written_by_or = flags.is_or;
6308 break;
6310 default:
6311 gcc_unreachable ();
6314 else
6316 if (flags.is_branch)
6318 /* Branches have several RAW exceptions that allow to avoid
6319 barriers. */
6321 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6322 /* RAW dependencies on branch regs are permissible as long
6323 as the writer is a non-branch instruction. Since we
6324 never generate code that uses a branch register written
6325 by a branch instruction, handling this case is
6326 easy. */
6327 return 0;
6329 if (REGNO_REG_CLASS (regno) == PR_REGS
6330 && ! rws_sum[regno].written_by_fp)
6331 /* The predicates of a branch are available within the
6332 same insn group as long as the predicate was written by
6333 something other than a floating-point instruction. */
6334 return 0;
6337 if (flags.is_and && rws_sum[regno].written_by_and)
6338 return 0;
6339 if (flags.is_or && rws_sum[regno].written_by_or)
6340 return 0;
6342 switch (rws_sum[regno].write_count)
6344 case 0:
6345 /* The register has not been written yet. */
6346 break;
6348 case 1:
6349 /* The register has been written via a predicate, assume we
6350 need a barrier (don't check for complementary regs). */
6351 need_barrier = 1;
6352 break;
6354 case 2:
6355 /* The register has been unconditionally written already. We
6356 need a barrier. */
6357 need_barrier = 1;
6358 break;
6360 default:
6361 gcc_unreachable ();
6365 return need_barrier;
6368 static int
6369 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6371 int regno = REGNO (reg);
6372 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6374 if (n == 1)
6375 return rws_access_regno (regno, flags, pred);
6376 else
6378 int need_barrier = 0;
6379 while (--n >= 0)
6380 need_barrier |= rws_access_regno (regno + n, flags, pred);
6381 return need_barrier;
6385 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6386 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6388 static void
6389 update_set_flags (rtx x, struct reg_flags *pflags)
6391 rtx src = SET_SRC (x);
6393 switch (GET_CODE (src))
6395 case CALL:
6396 return;
6398 case IF_THEN_ELSE:
6399 /* There are four cases here:
6400 (1) The destination is (pc), in which case this is a branch,
6401 nothing here applies.
6402 (2) The destination is ar.lc, in which case this is a
6403 doloop_end_internal,
6404 (3) The destination is an fp register, in which case this is
6405 an fselect instruction.
6406 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6407 this is a check load.
6408 In all cases, nothing we do in this function applies. */
6409 return;
6411 default:
6412 if (COMPARISON_P (src)
6413 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6414 /* Set pflags->is_fp to 1 so that we know we're dealing
6415 with a floating point comparison when processing the
6416 destination of the SET. */
6417 pflags->is_fp = 1;
6419 /* Discover if this is a parallel comparison. We only handle
6420 and.orcm and or.andcm at present, since we must retain a
6421 strict inverse on the predicate pair. */
6422 else if (GET_CODE (src) == AND)
6423 pflags->is_and = 1;
6424 else if (GET_CODE (src) == IOR)
6425 pflags->is_or = 1;
6427 break;
6431 /* Subroutine of rtx_needs_barrier; this function determines whether the
6432 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6433 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6434 for this insn. */
6436 static int
6437 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6439 int need_barrier = 0;
6440 rtx dst;
6441 rtx src = SET_SRC (x);
6443 if (GET_CODE (src) == CALL)
6444 /* We don't need to worry about the result registers that
6445 get written by subroutine call. */
6446 return rtx_needs_barrier (src, flags, pred);
6447 else if (SET_DEST (x) == pc_rtx)
6449 /* X is a conditional branch. */
6450 /* ??? This seems redundant, as the caller sets this bit for
6451 all JUMP_INSNs. */
6452 if (!ia64_spec_check_src_p (src))
6453 flags.is_branch = 1;
6454 return rtx_needs_barrier (src, flags, pred);
6457 if (ia64_spec_check_src_p (src))
6458 /* Avoid checking one register twice (in condition
6459 and in 'then' section) for ldc pattern. */
6461 gcc_assert (REG_P (XEXP (src, 2)));
6462 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6464 /* We process MEM below. */
6465 src = XEXP (src, 1);
6468 need_barrier |= rtx_needs_barrier (src, flags, pred);
6470 dst = SET_DEST (x);
6471 if (GET_CODE (dst) == ZERO_EXTRACT)
6473 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6474 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6476 return need_barrier;
6479 /* Handle an access to rtx X of type FLAGS using predicate register
6480 PRED. Return 1 if this access creates a dependency with an earlier
6481 instruction in the same group. */
6483 static int
6484 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6486 int i, j;
6487 int is_complemented = 0;
6488 int need_barrier = 0;
6489 const char *format_ptr;
6490 struct reg_flags new_flags;
6491 rtx cond;
6493 if (! x)
6494 return 0;
6496 new_flags = flags;
6498 switch (GET_CODE (x))
6500 case SET:
6501 update_set_flags (x, &new_flags);
6502 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6503 if (GET_CODE (SET_SRC (x)) != CALL)
6505 new_flags.is_write = 1;
6506 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6508 break;
6510 case CALL:
6511 new_flags.is_write = 0;
6512 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6514 /* Avoid multiple register writes, in case this is a pattern with
6515 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6516 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6518 new_flags.is_write = 1;
6519 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6520 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6521 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6523 break;
6525 case COND_EXEC:
6526 /* X is a predicated instruction. */
6528 cond = COND_EXEC_TEST (x);
6529 gcc_assert (!pred);
6530 need_barrier = rtx_needs_barrier (cond, flags, 0);
6532 if (GET_CODE (cond) == EQ)
6533 is_complemented = 1;
6534 cond = XEXP (cond, 0);
6535 gcc_assert (GET_CODE (cond) == REG
6536 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6537 pred = REGNO (cond);
6538 if (is_complemented)
6539 ++pred;
6541 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6542 return need_barrier;
6544 case CLOBBER:
6545 case USE:
6546 /* Clobber & use are for earlier compiler-phases only. */
6547 break;
6549 case ASM_OPERANDS:
6550 case ASM_INPUT:
6551 /* We always emit stop bits for traditional asms. We emit stop bits
6552 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6553 if (GET_CODE (x) != ASM_OPERANDS
6554 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6556 /* Avoid writing the register multiple times if we have multiple
6557 asm outputs. This avoids a failure in rws_access_reg. */
6558 if (! rws_insn_test (REG_VOLATILE))
6560 new_flags.is_write = 1;
6561 rws_access_regno (REG_VOLATILE, new_flags, pred);
6563 return 1;
6566 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6567 We cannot just fall through here since then we would be confused
6568 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6569 traditional asms unlike their normal usage. */
6571 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6572 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6573 need_barrier = 1;
6574 break;
6576 case PARALLEL:
6577 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6579 rtx pat = XVECEXP (x, 0, i);
6580 switch (GET_CODE (pat))
6582 case SET:
6583 update_set_flags (pat, &new_flags);
6584 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6585 break;
6587 case USE:
6588 case CALL:
6589 case ASM_OPERANDS:
6590 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6591 break;
6593 case CLOBBER:
6594 if (REG_P (XEXP (pat, 0))
6595 && extract_asm_operands (x) != NULL_RTX
6596 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6598 new_flags.is_write = 1;
6599 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6600 new_flags, pred);
6601 new_flags = flags;
6603 break;
6605 case RETURN:
6606 break;
6608 default:
6609 gcc_unreachable ();
6612 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6614 rtx pat = XVECEXP (x, 0, i);
6615 if (GET_CODE (pat) == SET)
6617 if (GET_CODE (SET_SRC (pat)) != CALL)
6619 new_flags.is_write = 1;
6620 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6621 pred);
6624 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6625 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6627 break;
6629 case SUBREG:
6630 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6631 break;
6632 case REG:
6633 if (REGNO (x) == AR_UNAT_REGNUM)
6635 for (i = 0; i < 64; ++i)
6636 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6638 else
6639 need_barrier = rws_access_reg (x, flags, pred);
6640 break;
6642 case MEM:
6643 /* Find the regs used in memory address computation. */
6644 new_flags.is_write = 0;
6645 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6646 break;
6648 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6649 case SYMBOL_REF: case LABEL_REF: case CONST:
6650 break;
6652 /* Operators with side-effects. */
6653 case POST_INC: case POST_DEC:
6654 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6656 new_flags.is_write = 0;
6657 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6658 new_flags.is_write = 1;
6659 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6660 break;
6662 case POST_MODIFY:
6663 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6665 new_flags.is_write = 0;
6666 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6667 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6668 new_flags.is_write = 1;
6669 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6670 break;
6672 /* Handle common unary and binary ops for efficiency. */
6673 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6674 case MOD: case UDIV: case UMOD: case AND: case IOR:
6675 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6676 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6677 case NE: case EQ: case GE: case GT: case LE:
6678 case LT: case GEU: case GTU: case LEU: case LTU:
6679 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6680 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6681 break;
6683 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6684 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6685 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6686 case SQRT: case FFS: case POPCOUNT:
6687 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6688 break;
6690 case VEC_SELECT:
6691 /* VEC_SELECT's second argument is a PARALLEL with integers that
6692 describe the elements selected. On ia64, those integers are
6693 always constants. Avoid walking the PARALLEL so that we don't
6694 get confused with "normal" parallels and then die. */
6695 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6696 break;
6698 case UNSPEC:
6699 switch (XINT (x, 1))
6701 case UNSPEC_LTOFF_DTPMOD:
6702 case UNSPEC_LTOFF_DTPREL:
6703 case UNSPEC_DTPREL:
6704 case UNSPEC_LTOFF_TPREL:
6705 case UNSPEC_TPREL:
6706 case UNSPEC_PRED_REL_MUTEX:
6707 case UNSPEC_PIC_CALL:
6708 case UNSPEC_MF:
6709 case UNSPEC_FETCHADD_ACQ:
6710 case UNSPEC_FETCHADD_REL:
6711 case UNSPEC_BSP_VALUE:
6712 case UNSPEC_FLUSHRS:
6713 case UNSPEC_BUNDLE_SELECTOR:
6714 break;
6716 case UNSPEC_GR_SPILL:
6717 case UNSPEC_GR_RESTORE:
6719 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6720 HOST_WIDE_INT bit = (offset >> 3) & 63;
6722 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6723 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6724 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6725 new_flags, pred);
6726 break;
6729 case UNSPEC_FR_SPILL:
6730 case UNSPEC_FR_RESTORE:
6731 case UNSPEC_GETF_EXP:
6732 case UNSPEC_SETF_EXP:
6733 case UNSPEC_ADDP4:
6734 case UNSPEC_FR_SQRT_RECIP_APPROX:
6735 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6736 case UNSPEC_LDA:
6737 case UNSPEC_LDS:
6738 case UNSPEC_LDS_A:
6739 case UNSPEC_LDSA:
6740 case UNSPEC_CHKACLR:
6741 case UNSPEC_CHKS:
6742 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6743 break;
6745 case UNSPEC_FR_RECIP_APPROX:
6746 case UNSPEC_SHRP:
6747 case UNSPEC_COPYSIGN:
6748 case UNSPEC_FR_RECIP_APPROX_RES:
6749 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6750 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6751 break;
6753 case UNSPEC_CMPXCHG_ACQ:
6754 case UNSPEC_CMPXCHG_REL:
6755 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6756 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6757 break;
6759 default:
6760 gcc_unreachable ();
6762 break;
6764 case UNSPEC_VOLATILE:
6765 switch (XINT (x, 1))
6767 case UNSPECV_ALLOC:
6768 /* Alloc must always be the first instruction of a group.
6769 We force this by always returning true. */
6770 /* ??? We might get better scheduling if we explicitly check for
6771 input/local/output register dependencies, and modify the
6772 scheduler so that alloc is always reordered to the start of
6773 the current group. We could then eliminate all of the
6774 first_instruction code. */
6775 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6777 new_flags.is_write = 1;
6778 rws_access_regno (REG_AR_CFM, new_flags, pred);
6779 return 1;
6781 case UNSPECV_SET_BSP:
6782 case UNSPECV_PROBE_STACK_RANGE:
6783 need_barrier = 1;
6784 break;
6786 case UNSPECV_BLOCKAGE:
6787 case UNSPECV_INSN_GROUP_BARRIER:
6788 case UNSPECV_BREAK:
6789 case UNSPECV_PSAC_ALL:
6790 case UNSPECV_PSAC_NORMAL:
6791 return 0;
6793 case UNSPECV_PROBE_STACK_ADDRESS:
6794 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6795 break;
6797 default:
6798 gcc_unreachable ();
6800 break;
6802 case RETURN:
6803 new_flags.is_write = 0;
6804 need_barrier = rws_access_regno (REG_RP, flags, pred);
6805 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6807 new_flags.is_write = 1;
6808 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6809 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6810 break;
6812 default:
6813 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6814 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6815 switch (format_ptr[i])
6817 case '0': /* unused field */
6818 case 'i': /* integer */
6819 case 'n': /* note */
6820 case 'w': /* wide integer */
6821 case 's': /* pointer to string */
6822 case 'S': /* optional pointer to string */
6823 break;
6825 case 'e':
6826 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6827 need_barrier = 1;
6828 break;
6830 case 'E':
6831 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6832 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6833 need_barrier = 1;
6834 break;
6836 default:
6837 gcc_unreachable ();
6839 break;
6841 return need_barrier;
6844 /* Clear out the state for group_barrier_needed at the start of a
6845 sequence of insns. */
6847 static void
6848 init_insn_group_barriers (void)
6850 memset (rws_sum, 0, sizeof (rws_sum));
6851 first_instruction = 1;
6854 /* Given the current state, determine whether a group barrier (a stop bit) is
6855 necessary before INSN. Return nonzero if so. This modifies the state to
6856 include the effects of INSN as a side-effect. */
6858 static int
6859 group_barrier_needed (rtx_insn *insn)
6861 rtx pat;
6862 int need_barrier = 0;
6863 struct reg_flags flags;
6865 memset (&flags, 0, sizeof (flags));
6866 switch (GET_CODE (insn))
6868 case NOTE:
6869 case DEBUG_INSN:
6870 break;
6872 case BARRIER:
6873 /* A barrier doesn't imply an instruction group boundary. */
6874 break;
6876 case CODE_LABEL:
6877 memset (rws_insn, 0, sizeof (rws_insn));
6878 return 1;
6880 case CALL_INSN:
6881 flags.is_branch = 1;
6882 flags.is_sibcall = SIBLING_CALL_P (insn);
6883 memset (rws_insn, 0, sizeof (rws_insn));
6885 /* Don't bundle a call following another call. */
6886 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6888 need_barrier = 1;
6889 break;
6892 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6893 break;
6895 case JUMP_INSN:
6896 if (!ia64_spec_check_p (insn))
6897 flags.is_branch = 1;
6899 /* Don't bundle a jump following a call. */
6900 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6902 need_barrier = 1;
6903 break;
6905 /* FALLTHRU */
6907 case INSN:
6908 if (GET_CODE (PATTERN (insn)) == USE
6909 || GET_CODE (PATTERN (insn)) == CLOBBER)
6910 /* Don't care about USE and CLOBBER "insns"---those are used to
6911 indicate to the optimizer that it shouldn't get rid of
6912 certain operations. */
6913 break;
6915 pat = PATTERN (insn);
6917 /* Ug. Hack hacks hacked elsewhere. */
6918 switch (recog_memoized (insn))
6920 /* We play dependency tricks with the epilogue in order
6921 to get proper schedules. Undo this for dv analysis. */
6922 case CODE_FOR_epilogue_deallocate_stack:
6923 case CODE_FOR_prologue_allocate_stack:
6924 pat = XVECEXP (pat, 0, 0);
6925 break;
6927 /* The pattern we use for br.cloop confuses the code above.
6928 The second element of the vector is representative. */
6929 case CODE_FOR_doloop_end_internal:
6930 pat = XVECEXP (pat, 0, 1);
6931 break;
6933 /* Doesn't generate code. */
6934 case CODE_FOR_pred_rel_mutex:
6935 case CODE_FOR_prologue_use:
6936 return 0;
6938 default:
6939 break;
6942 memset (rws_insn, 0, sizeof (rws_insn));
6943 need_barrier = rtx_needs_barrier (pat, flags, 0);
6945 /* Check to see if the previous instruction was a volatile
6946 asm. */
6947 if (! need_barrier)
6948 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6950 break;
6952 default:
6953 gcc_unreachable ();
6956 if (first_instruction && important_for_bundling_p (insn))
6958 need_barrier = 0;
6959 first_instruction = 0;
6962 return need_barrier;
6965 /* Like group_barrier_needed, but do not clobber the current state. */
6967 static int
6968 safe_group_barrier_needed (rtx_insn *insn)
6970 int saved_first_instruction;
6971 int t;
6973 saved_first_instruction = first_instruction;
6974 in_safe_group_barrier = 1;
6976 t = group_barrier_needed (insn);
6978 first_instruction = saved_first_instruction;
6979 in_safe_group_barrier = 0;
6981 return t;
6984 /* Scan the current function and insert stop bits as necessary to
6985 eliminate dependencies. This function assumes that a final
6986 instruction scheduling pass has been run which has already
6987 inserted most of the necessary stop bits. This function only
6988 inserts new ones at basic block boundaries, since these are
6989 invisible to the scheduler. */
6991 static void
6992 emit_insn_group_barriers (FILE *dump)
6994 rtx_insn *insn;
6995 rtx_insn *last_label = 0;
6996 int insns_since_last_label = 0;
6998 init_insn_group_barriers ();
7000 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7002 if (LABEL_P (insn))
7004 if (insns_since_last_label)
7005 last_label = insn;
7006 insns_since_last_label = 0;
7008 else if (NOTE_P (insn)
7009 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7011 if (insns_since_last_label)
7012 last_label = insn;
7013 insns_since_last_label = 0;
7015 else if (NONJUMP_INSN_P (insn)
7016 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7017 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7019 init_insn_group_barriers ();
7020 last_label = 0;
7022 else if (NONDEBUG_INSN_P (insn))
7024 insns_since_last_label = 1;
7026 if (group_barrier_needed (insn))
7028 if (last_label)
7030 if (dump)
7031 fprintf (dump, "Emitting stop before label %d\n",
7032 INSN_UID (last_label));
7033 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7034 insn = last_label;
7036 init_insn_group_barriers ();
7037 last_label = 0;
7044 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7045 This function has to emit all necessary group barriers. */
7047 static void
7048 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7050 rtx_insn *insn;
7052 init_insn_group_barriers ();
7054 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7056 if (BARRIER_P (insn))
7058 rtx_insn *last = prev_active_insn (insn);
7060 if (! last)
7061 continue;
7062 if (JUMP_TABLE_DATA_P (last))
7063 last = prev_active_insn (last);
7064 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7065 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7067 init_insn_group_barriers ();
7069 else if (NONDEBUG_INSN_P (insn))
7071 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7072 init_insn_group_barriers ();
7073 else if (group_barrier_needed (insn))
7075 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7076 init_insn_group_barriers ();
7077 group_barrier_needed (insn);
7085 /* Instruction scheduling support. */
7087 #define NR_BUNDLES 10
7089 /* A list of names of all available bundles. */
7091 static const char *bundle_name [NR_BUNDLES] =
7093 ".mii",
7094 ".mmi",
7095 ".mfi",
7096 ".mmf",
7097 #if NR_BUNDLES == 10
7098 ".bbb",
7099 ".mbb",
7100 #endif
7101 ".mib",
7102 ".mmb",
7103 ".mfb",
7104 ".mlx"
7107 /* Nonzero if we should insert stop bits into the schedule. */
7109 int ia64_final_schedule = 0;
7111 /* Codes of the corresponding queried units: */
7113 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7114 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7116 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7117 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7119 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7121 /* The following variable value is an insn group barrier. */
7123 static rtx_insn *dfa_stop_insn;
7125 /* The following variable value is the last issued insn. */
7127 static rtx_insn *last_scheduled_insn;
7129 /* The following variable value is pointer to a DFA state used as
7130 temporary variable. */
7132 static state_t temp_dfa_state = NULL;
7134 /* The following variable value is DFA state after issuing the last
7135 insn. */
7137 static state_t prev_cycle_state = NULL;
7139 /* The following array element values are TRUE if the corresponding
7140 insn requires to add stop bits before it. */
7142 static char *stops_p = NULL;
7144 /* The following variable is used to set up the mentioned above array. */
7146 static int stop_before_p = 0;
7148 /* The following variable value is length of the arrays `clocks' and
7149 `add_cycles'. */
7151 static int clocks_length;
7153 /* The following variable value is number of data speculations in progress. */
7154 static int pending_data_specs = 0;
7156 /* Number of memory references on current and three future processor cycles. */
7157 static char mem_ops_in_group[4];
7159 /* Number of current processor cycle (from scheduler's point of view). */
7160 static int current_cycle;
7162 static rtx ia64_single_set (rtx_insn *);
7163 static void ia64_emit_insn_before (rtx, rtx);
7165 /* Map a bundle number to its pseudo-op. */
7167 const char *
7168 get_bundle_name (int b)
7170 return bundle_name[b];
7174 /* Return the maximum number of instructions a cpu can issue. */
7176 static int
7177 ia64_issue_rate (void)
7179 return 6;
7182 /* Helper function - like single_set, but look inside COND_EXEC. */
7184 static rtx
7185 ia64_single_set (rtx_insn *insn)
7187 rtx x = PATTERN (insn), ret;
7188 if (GET_CODE (x) == COND_EXEC)
7189 x = COND_EXEC_CODE (x);
7190 if (GET_CODE (x) == SET)
7191 return x;
7193 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7194 Although they are not classical single set, the second set is there just
7195 to protect it from moving past FP-relative stack accesses. */
7196 switch (recog_memoized (insn))
7198 case CODE_FOR_prologue_allocate_stack:
7199 case CODE_FOR_prologue_allocate_stack_pr:
7200 case CODE_FOR_epilogue_deallocate_stack:
7201 case CODE_FOR_epilogue_deallocate_stack_pr:
7202 ret = XVECEXP (x, 0, 0);
7203 break;
7205 default:
7206 ret = single_set_2 (insn, x);
7207 break;
7210 return ret;
7213 /* Adjust the cost of a scheduling dependency.
7214 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7215 COST is the current cost, DW is dependency weakness. */
7216 static int
7217 ia64_adjust_cost_2 (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7218 int cost, dw_t dw)
7220 enum reg_note dep_type = (enum reg_note) dep_type1;
7221 enum attr_itanium_class dep_class;
7222 enum attr_itanium_class insn_class;
7224 insn_class = ia64_safe_itanium_class (insn);
7225 dep_class = ia64_safe_itanium_class (dep_insn);
7227 /* Treat true memory dependencies separately. Ignore apparent true
7228 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7229 if (dep_type == REG_DEP_TRUE
7230 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7231 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7232 return 0;
7234 if (dw == MIN_DEP_WEAK)
7235 /* Store and load are likely to alias, use higher cost to avoid stall. */
7236 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7237 else if (dw > MIN_DEP_WEAK)
7239 /* Store and load are less likely to alias. */
7240 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7241 /* Assume there will be no cache conflict for floating-point data.
7242 For integer data, L1 conflict penalty is huge (17 cycles), so we
7243 never assume it will not cause a conflict. */
7244 return 0;
7245 else
7246 return cost;
7249 if (dep_type != REG_DEP_OUTPUT)
7250 return cost;
7252 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7253 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7254 return 0;
7256 return cost;
7259 /* Like emit_insn_before, but skip cycle_display notes.
7260 ??? When cycle display notes are implemented, update this. */
7262 static void
7263 ia64_emit_insn_before (rtx insn, rtx before)
7265 emit_insn_before (insn, before);
7268 /* The following function marks insns who produce addresses for load
7269 and store insns. Such insns will be placed into M slots because it
7270 decrease latency time for Itanium1 (see function
7271 `ia64_produce_address_p' and the DFA descriptions). */
7273 static void
7274 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7276 rtx_insn *insn, *next, *next_tail;
7278 /* Before reload, which_alternative is not set, which means that
7279 ia64_safe_itanium_class will produce wrong results for (at least)
7280 move instructions. */
7281 if (!reload_completed)
7282 return;
7284 next_tail = NEXT_INSN (tail);
7285 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7286 if (INSN_P (insn))
7287 insn->call = 0;
7288 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7289 if (INSN_P (insn)
7290 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7292 sd_iterator_def sd_it;
7293 dep_t dep;
7294 bool has_mem_op_consumer_p = false;
7296 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7298 enum attr_itanium_class c;
7300 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7301 continue;
7303 next = DEP_CON (dep);
7304 c = ia64_safe_itanium_class (next);
7305 if ((c == ITANIUM_CLASS_ST
7306 || c == ITANIUM_CLASS_STF)
7307 && ia64_st_address_bypass_p (insn, next))
7309 has_mem_op_consumer_p = true;
7310 break;
7312 else if ((c == ITANIUM_CLASS_LD
7313 || c == ITANIUM_CLASS_FLD
7314 || c == ITANIUM_CLASS_FLDP)
7315 && ia64_ld_address_bypass_p (insn, next))
7317 has_mem_op_consumer_p = true;
7318 break;
7322 insn->call = has_mem_op_consumer_p;
7326 /* We're beginning a new block. Initialize data structures as necessary. */
7328 static void
7329 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7330 int sched_verbose ATTRIBUTE_UNUSED,
7331 int max_ready ATTRIBUTE_UNUSED)
7333 #ifdef ENABLE_CHECKING
7334 rtx_insn *insn;
7336 if (!sel_sched_p () && reload_completed)
7337 for (insn = NEXT_INSN (current_sched_info->prev_head);
7338 insn != current_sched_info->next_tail;
7339 insn = NEXT_INSN (insn))
7340 gcc_assert (!SCHED_GROUP_P (insn));
7341 #endif
7342 last_scheduled_insn = NULL;
7343 init_insn_group_barriers ();
7345 current_cycle = 0;
7346 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7349 /* We're beginning a scheduling pass. Check assertion. */
7351 static void
7352 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7353 int sched_verbose ATTRIBUTE_UNUSED,
7354 int max_ready ATTRIBUTE_UNUSED)
7356 gcc_assert (pending_data_specs == 0);
7359 /* Scheduling pass is now finished. Free/reset static variable. */
7360 static void
7361 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7362 int sched_verbose ATTRIBUTE_UNUSED)
7364 gcc_assert (pending_data_specs == 0);
7367 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7368 speculation check), FALSE otherwise. */
7369 static bool
7370 is_load_p (rtx_insn *insn)
7372 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7374 return
7375 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7376 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7379 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7380 (taking account for 3-cycle cache reference postponing for stores: Intel
7381 Itanium 2 Reference Manual for Software Development and Optimization,
7382 6.7.3.1). */
7383 static void
7384 record_memory_reference (rtx_insn *insn)
7386 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7388 switch (insn_class) {
7389 case ITANIUM_CLASS_FLD:
7390 case ITANIUM_CLASS_LD:
7391 mem_ops_in_group[current_cycle % 4]++;
7392 break;
7393 case ITANIUM_CLASS_STF:
7394 case ITANIUM_CLASS_ST:
7395 mem_ops_in_group[(current_cycle + 3) % 4]++;
7396 break;
7397 default:;
7401 /* We are about to being issuing insns for this clock cycle.
7402 Override the default sort algorithm to better slot instructions. */
7404 static int
7405 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7406 int *pn_ready, int clock_var,
7407 int reorder_type)
7409 int n_asms;
7410 int n_ready = *pn_ready;
7411 rtx_insn **e_ready = ready + n_ready;
7412 rtx_insn **insnp;
7414 if (sched_verbose)
7415 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7417 if (reorder_type == 0)
7419 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7420 n_asms = 0;
7421 for (insnp = ready; insnp < e_ready; insnp++)
7422 if (insnp < e_ready)
7424 rtx_insn *insn = *insnp;
7425 enum attr_type t = ia64_safe_type (insn);
7426 if (t == TYPE_UNKNOWN)
7428 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7429 || asm_noperands (PATTERN (insn)) >= 0)
7431 rtx_insn *lowest = ready[n_asms];
7432 ready[n_asms] = insn;
7433 *insnp = lowest;
7434 n_asms++;
7436 else
7438 rtx_insn *highest = ready[n_ready - 1];
7439 ready[n_ready - 1] = insn;
7440 *insnp = highest;
7441 return 1;
7446 if (n_asms < n_ready)
7448 /* Some normal insns to process. Skip the asms. */
7449 ready += n_asms;
7450 n_ready -= n_asms;
7452 else if (n_ready > 0)
7453 return 1;
7456 if (ia64_final_schedule)
7458 int deleted = 0;
7459 int nr_need_stop = 0;
7461 for (insnp = ready; insnp < e_ready; insnp++)
7462 if (safe_group_barrier_needed (*insnp))
7463 nr_need_stop++;
7465 if (reorder_type == 1 && n_ready == nr_need_stop)
7466 return 0;
7467 if (reorder_type == 0)
7468 return 1;
7469 insnp = e_ready;
7470 /* Move down everything that needs a stop bit, preserving
7471 relative order. */
7472 while (insnp-- > ready + deleted)
7473 while (insnp >= ready + deleted)
7475 rtx_insn *insn = *insnp;
7476 if (! safe_group_barrier_needed (insn))
7477 break;
7478 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7479 *ready = insn;
7480 deleted++;
7482 n_ready -= deleted;
7483 ready += deleted;
7486 current_cycle = clock_var;
7487 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7489 int moved = 0;
7491 insnp = e_ready;
7492 /* Move down loads/stores, preserving relative order. */
7493 while (insnp-- > ready + moved)
7494 while (insnp >= ready + moved)
7496 rtx_insn *insn = *insnp;
7497 if (! is_load_p (insn))
7498 break;
7499 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7500 *ready = insn;
7501 moved++;
7503 n_ready -= moved;
7504 ready += moved;
7507 return 1;
7510 /* We are about to being issuing insns for this clock cycle. Override
7511 the default sort algorithm to better slot instructions. */
7513 static int
7514 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7515 int *pn_ready, int clock_var)
7517 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7518 pn_ready, clock_var, 0);
7521 /* Like ia64_sched_reorder, but called after issuing each insn.
7522 Override the default sort algorithm to better slot instructions. */
7524 static int
7525 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7526 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7527 int *pn_ready, int clock_var)
7529 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7530 clock_var, 1);
7533 /* We are about to issue INSN. Return the number of insns left on the
7534 ready queue that can be issued this cycle. */
7536 static int
7537 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7538 int sched_verbose ATTRIBUTE_UNUSED,
7539 rtx_insn *insn,
7540 int can_issue_more ATTRIBUTE_UNUSED)
7542 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7543 /* Modulo scheduling does not extend h_i_d when emitting
7544 new instructions. Don't use h_i_d, if we don't have to. */
7546 if (DONE_SPEC (insn) & BEGIN_DATA)
7547 pending_data_specs++;
7548 if (CHECK_SPEC (insn) & BEGIN_DATA)
7549 pending_data_specs--;
7552 if (DEBUG_INSN_P (insn))
7553 return 1;
7555 last_scheduled_insn = insn;
7556 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7557 if (reload_completed)
7559 int needed = group_barrier_needed (insn);
7561 gcc_assert (!needed);
7562 if (CALL_P (insn))
7563 init_insn_group_barriers ();
7564 stops_p [INSN_UID (insn)] = stop_before_p;
7565 stop_before_p = 0;
7567 record_memory_reference (insn);
7569 return 1;
7572 /* We are choosing insn from the ready queue. Return zero if INSN
7573 can be chosen. */
7575 static int
7576 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7578 gcc_assert (insn && INSN_P (insn));
7580 /* Size of ALAT is 32. As far as we perform conservative
7581 data speculation, we keep ALAT half-empty. */
7582 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7583 return ready_index == 0 ? -1 : 1;
7585 if (ready_index == 0)
7586 return 0;
7588 if ((!reload_completed
7589 || !safe_group_barrier_needed (insn))
7590 && (!mflag_sched_mem_insns_hard_limit
7591 || !is_load_p (insn)
7592 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7593 return 0;
7595 return 1;
7598 /* The following variable value is pseudo-insn used by the DFA insn
7599 scheduler to change the DFA state when the simulated clock is
7600 increased. */
7602 static rtx_insn *dfa_pre_cycle_insn;
7604 /* Returns 1 when a meaningful insn was scheduled between the last group
7605 barrier and LAST. */
7606 static int
7607 scheduled_good_insn (rtx_insn *last)
7609 if (last && recog_memoized (last) >= 0)
7610 return 1;
7612 for ( ;
7613 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7614 && !stops_p[INSN_UID (last)];
7615 last = PREV_INSN (last))
7616 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7617 the ebb we're scheduling. */
7618 if (INSN_P (last) && recog_memoized (last) >= 0)
7619 return 1;
7621 return 0;
7624 /* We are about to being issuing INSN. Return nonzero if we cannot
7625 issue it on given cycle CLOCK and return zero if we should not sort
7626 the ready queue on the next clock start. */
7628 static int
7629 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7630 int clock, int *sort_p)
7632 gcc_assert (insn && INSN_P (insn));
7634 if (DEBUG_INSN_P (insn))
7635 return 0;
7637 /* When a group barrier is needed for insn, last_scheduled_insn
7638 should be set. */
7639 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7640 || last_scheduled_insn);
7642 if ((reload_completed
7643 && (safe_group_barrier_needed (insn)
7644 || (mflag_sched_stop_bits_after_every_cycle
7645 && last_clock != clock
7646 && last_scheduled_insn
7647 && scheduled_good_insn (last_scheduled_insn))))
7648 || (last_scheduled_insn
7649 && (CALL_P (last_scheduled_insn)
7650 || unknown_for_bundling_p (last_scheduled_insn))))
7652 init_insn_group_barriers ();
7654 if (verbose && dump)
7655 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7656 last_clock == clock ? " + cycle advance" : "");
7658 stop_before_p = 1;
7659 current_cycle = clock;
7660 mem_ops_in_group[current_cycle % 4] = 0;
7662 if (last_clock == clock)
7664 state_transition (curr_state, dfa_stop_insn);
7665 if (TARGET_EARLY_STOP_BITS)
7666 *sort_p = (last_scheduled_insn == NULL_RTX
7667 || ! CALL_P (last_scheduled_insn));
7668 else
7669 *sort_p = 0;
7670 return 1;
7673 if (last_scheduled_insn)
7675 if (unknown_for_bundling_p (last_scheduled_insn))
7676 state_reset (curr_state);
7677 else
7679 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7680 state_transition (curr_state, dfa_stop_insn);
7681 state_transition (curr_state, dfa_pre_cycle_insn);
7682 state_transition (curr_state, NULL);
7686 return 0;
7689 /* Implement targetm.sched.h_i_d_extended hook.
7690 Extend internal data structures. */
7691 static void
7692 ia64_h_i_d_extended (void)
7694 if (stops_p != NULL)
7696 int new_clocks_length = get_max_uid () * 3 / 2;
7697 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7698 clocks_length = new_clocks_length;
7703 /* This structure describes the data used by the backend to guide scheduling.
7704 When the current scheduling point is switched, this data should be saved
7705 and restored later, if the scheduler returns to this point. */
7706 struct _ia64_sched_context
7708 state_t prev_cycle_state;
7709 rtx_insn *last_scheduled_insn;
7710 struct reg_write_state rws_sum[NUM_REGS];
7711 struct reg_write_state rws_insn[NUM_REGS];
7712 int first_instruction;
7713 int pending_data_specs;
7714 int current_cycle;
7715 char mem_ops_in_group[4];
7717 typedef struct _ia64_sched_context *ia64_sched_context_t;
7719 /* Allocates a scheduling context. */
7720 static void *
7721 ia64_alloc_sched_context (void)
7723 return xmalloc (sizeof (struct _ia64_sched_context));
7726 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7727 the global context otherwise. */
7728 static void
7729 ia64_init_sched_context (void *_sc, bool clean_p)
7731 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7733 sc->prev_cycle_state = xmalloc (dfa_state_size);
7734 if (clean_p)
7736 state_reset (sc->prev_cycle_state);
7737 sc->last_scheduled_insn = NULL;
7738 memset (sc->rws_sum, 0, sizeof (rws_sum));
7739 memset (sc->rws_insn, 0, sizeof (rws_insn));
7740 sc->first_instruction = 1;
7741 sc->pending_data_specs = 0;
7742 sc->current_cycle = 0;
7743 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7745 else
7747 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7748 sc->last_scheduled_insn = last_scheduled_insn;
7749 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7750 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7751 sc->first_instruction = first_instruction;
7752 sc->pending_data_specs = pending_data_specs;
7753 sc->current_cycle = current_cycle;
7754 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7758 /* Sets the global scheduling context to the one pointed to by _SC. */
7759 static void
7760 ia64_set_sched_context (void *_sc)
7762 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7764 gcc_assert (sc != NULL);
7766 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7767 last_scheduled_insn = sc->last_scheduled_insn;
7768 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7769 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7770 first_instruction = sc->first_instruction;
7771 pending_data_specs = sc->pending_data_specs;
7772 current_cycle = sc->current_cycle;
7773 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7776 /* Clears the data in the _SC scheduling context. */
7777 static void
7778 ia64_clear_sched_context (void *_sc)
7780 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7782 free (sc->prev_cycle_state);
7783 sc->prev_cycle_state = NULL;
7786 /* Frees the _SC scheduling context. */
7787 static void
7788 ia64_free_sched_context (void *_sc)
7790 gcc_assert (_sc != NULL);
7792 free (_sc);
7795 typedef rtx (* gen_func_t) (rtx, rtx);
7797 /* Return a function that will generate a load of mode MODE_NO
7798 with speculation types TS. */
7799 static gen_func_t
7800 get_spec_load_gen_function (ds_t ts, int mode_no)
7802 static gen_func_t gen_ld_[] = {
7803 gen_movbi,
7804 gen_movqi_internal,
7805 gen_movhi_internal,
7806 gen_movsi_internal,
7807 gen_movdi_internal,
7808 gen_movsf_internal,
7809 gen_movdf_internal,
7810 gen_movxf_internal,
7811 gen_movti_internal,
7812 gen_zero_extendqidi2,
7813 gen_zero_extendhidi2,
7814 gen_zero_extendsidi2,
7817 static gen_func_t gen_ld_a[] = {
7818 gen_movbi_advanced,
7819 gen_movqi_advanced,
7820 gen_movhi_advanced,
7821 gen_movsi_advanced,
7822 gen_movdi_advanced,
7823 gen_movsf_advanced,
7824 gen_movdf_advanced,
7825 gen_movxf_advanced,
7826 gen_movti_advanced,
7827 gen_zero_extendqidi2_advanced,
7828 gen_zero_extendhidi2_advanced,
7829 gen_zero_extendsidi2_advanced,
7831 static gen_func_t gen_ld_s[] = {
7832 gen_movbi_speculative,
7833 gen_movqi_speculative,
7834 gen_movhi_speculative,
7835 gen_movsi_speculative,
7836 gen_movdi_speculative,
7837 gen_movsf_speculative,
7838 gen_movdf_speculative,
7839 gen_movxf_speculative,
7840 gen_movti_speculative,
7841 gen_zero_extendqidi2_speculative,
7842 gen_zero_extendhidi2_speculative,
7843 gen_zero_extendsidi2_speculative,
7845 static gen_func_t gen_ld_sa[] = {
7846 gen_movbi_speculative_advanced,
7847 gen_movqi_speculative_advanced,
7848 gen_movhi_speculative_advanced,
7849 gen_movsi_speculative_advanced,
7850 gen_movdi_speculative_advanced,
7851 gen_movsf_speculative_advanced,
7852 gen_movdf_speculative_advanced,
7853 gen_movxf_speculative_advanced,
7854 gen_movti_speculative_advanced,
7855 gen_zero_extendqidi2_speculative_advanced,
7856 gen_zero_extendhidi2_speculative_advanced,
7857 gen_zero_extendsidi2_speculative_advanced,
7859 static gen_func_t gen_ld_s_a[] = {
7860 gen_movbi_speculative_a,
7861 gen_movqi_speculative_a,
7862 gen_movhi_speculative_a,
7863 gen_movsi_speculative_a,
7864 gen_movdi_speculative_a,
7865 gen_movsf_speculative_a,
7866 gen_movdf_speculative_a,
7867 gen_movxf_speculative_a,
7868 gen_movti_speculative_a,
7869 gen_zero_extendqidi2_speculative_a,
7870 gen_zero_extendhidi2_speculative_a,
7871 gen_zero_extendsidi2_speculative_a,
7874 gen_func_t *gen_ld;
7876 if (ts & BEGIN_DATA)
7878 if (ts & BEGIN_CONTROL)
7879 gen_ld = gen_ld_sa;
7880 else
7881 gen_ld = gen_ld_a;
7883 else if (ts & BEGIN_CONTROL)
7885 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7886 || ia64_needs_block_p (ts))
7887 gen_ld = gen_ld_s;
7888 else
7889 gen_ld = gen_ld_s_a;
7891 else if (ts == 0)
7892 gen_ld = gen_ld_;
7893 else
7894 gcc_unreachable ();
7896 return gen_ld[mode_no];
7899 /* Constants that help mapping 'machine_mode' to int. */
7900 enum SPEC_MODES
7902 SPEC_MODE_INVALID = -1,
7903 SPEC_MODE_FIRST = 0,
7904 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7905 SPEC_MODE_FOR_EXTEND_LAST = 3,
7906 SPEC_MODE_LAST = 8
7909 enum
7911 /* Offset to reach ZERO_EXTEND patterns. */
7912 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7915 /* Return index of the MODE. */
7916 static int
7917 ia64_mode_to_int (machine_mode mode)
7919 switch (mode)
7921 case BImode: return 0; /* SPEC_MODE_FIRST */
7922 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7923 case HImode: return 2;
7924 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7925 case DImode: return 4;
7926 case SFmode: return 5;
7927 case DFmode: return 6;
7928 case XFmode: return 7;
7929 case TImode:
7930 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7931 mentioned in itanium[12].md. Predicate fp_register_operand also
7932 needs to be defined. Bottom line: better disable for now. */
7933 return SPEC_MODE_INVALID;
7934 default: return SPEC_MODE_INVALID;
7938 /* Provide information about speculation capabilities. */
7939 static void
7940 ia64_set_sched_flags (spec_info_t spec_info)
7942 unsigned int *flags = &(current_sched_info->flags);
7944 if (*flags & SCHED_RGN
7945 || *flags & SCHED_EBB
7946 || *flags & SEL_SCHED)
7948 int mask = 0;
7950 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7951 || (mflag_sched_ar_data_spec && reload_completed))
7953 mask |= BEGIN_DATA;
7955 if (!sel_sched_p ()
7956 && ((mflag_sched_br_in_data_spec && !reload_completed)
7957 || (mflag_sched_ar_in_data_spec && reload_completed)))
7958 mask |= BE_IN_DATA;
7961 if (mflag_sched_control_spec
7962 && (!sel_sched_p ()
7963 || reload_completed))
7965 mask |= BEGIN_CONTROL;
7967 if (!sel_sched_p () && mflag_sched_in_control_spec)
7968 mask |= BE_IN_CONTROL;
7971 spec_info->mask = mask;
7973 if (mask)
7975 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7977 if (mask & BE_IN_SPEC)
7978 *flags |= NEW_BBS;
7980 spec_info->flags = 0;
7982 if ((mask & CONTROL_SPEC)
7983 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7984 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7986 if (sched_verbose >= 1)
7987 spec_info->dump = sched_dump;
7988 else
7989 spec_info->dump = 0;
7991 if (mflag_sched_count_spec_in_critical_path)
7992 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7995 else
7996 spec_info->mask = 0;
7999 /* If INSN is an appropriate load return its mode.
8000 Return -1 otherwise. */
8001 static int
8002 get_mode_no_for_insn (rtx_insn *insn)
8004 rtx reg, mem, mode_rtx;
8005 int mode_no;
8006 bool extend_p;
8008 extract_insn_cached (insn);
8010 /* We use WHICH_ALTERNATIVE only after reload. This will
8011 guarantee that reload won't touch a speculative insn. */
8013 if (recog_data.n_operands != 2)
8014 return -1;
8016 reg = recog_data.operand[0];
8017 mem = recog_data.operand[1];
8019 /* We should use MEM's mode since REG's mode in presence of
8020 ZERO_EXTEND will always be DImode. */
8021 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8022 /* Process non-speculative ld. */
8024 if (!reload_completed)
8026 /* Do not speculate into regs like ar.lc. */
8027 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8028 return -1;
8030 if (!MEM_P (mem))
8031 return -1;
8034 rtx mem_reg = XEXP (mem, 0);
8036 if (!REG_P (mem_reg))
8037 return -1;
8040 mode_rtx = mem;
8042 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8044 gcc_assert (REG_P (reg) && MEM_P (mem));
8045 mode_rtx = mem;
8047 else
8048 return -1;
8050 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8051 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8052 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8053 /* Process speculative ld or ld.c. */
8055 gcc_assert (REG_P (reg) && MEM_P (mem));
8056 mode_rtx = mem;
8058 else
8060 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8062 if (attr_class == ITANIUM_CLASS_CHK_A
8063 || attr_class == ITANIUM_CLASS_CHK_S_I
8064 || attr_class == ITANIUM_CLASS_CHK_S_F)
8065 /* Process chk. */
8066 mode_rtx = reg;
8067 else
8068 return -1;
8071 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8073 if (mode_no == SPEC_MODE_INVALID)
8074 return -1;
8076 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8078 if (extend_p)
8080 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8081 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8082 return -1;
8084 mode_no += SPEC_GEN_EXTEND_OFFSET;
8087 return mode_no;
8090 /* If X is an unspec part of a speculative load, return its code.
8091 Return -1 otherwise. */
8092 static int
8093 get_spec_unspec_code (const_rtx x)
8095 if (GET_CODE (x) != UNSPEC)
8096 return -1;
8099 int code;
8101 code = XINT (x, 1);
8103 switch (code)
8105 case UNSPEC_LDA:
8106 case UNSPEC_LDS:
8107 case UNSPEC_LDS_A:
8108 case UNSPEC_LDSA:
8109 return code;
8111 default:
8112 return -1;
8117 /* Implement skip_rtx_p hook. */
8118 static bool
8119 ia64_skip_rtx_p (const_rtx x)
8121 return get_spec_unspec_code (x) != -1;
8124 /* If INSN is a speculative load, return its UNSPEC code.
8125 Return -1 otherwise. */
8126 static int
8127 get_insn_spec_code (const_rtx insn)
8129 rtx pat, reg, mem;
8131 pat = PATTERN (insn);
8133 if (GET_CODE (pat) == COND_EXEC)
8134 pat = COND_EXEC_CODE (pat);
8136 if (GET_CODE (pat) != SET)
8137 return -1;
8139 reg = SET_DEST (pat);
8140 if (!REG_P (reg))
8141 return -1;
8143 mem = SET_SRC (pat);
8144 if (GET_CODE (mem) == ZERO_EXTEND)
8145 mem = XEXP (mem, 0);
8147 return get_spec_unspec_code (mem);
8150 /* If INSN is a speculative load, return a ds with the speculation types.
8151 Otherwise [if INSN is a normal instruction] return 0. */
8152 static ds_t
8153 ia64_get_insn_spec_ds (rtx_insn *insn)
8155 int code = get_insn_spec_code (insn);
8157 switch (code)
8159 case UNSPEC_LDA:
8160 return BEGIN_DATA;
8162 case UNSPEC_LDS:
8163 case UNSPEC_LDS_A:
8164 return BEGIN_CONTROL;
8166 case UNSPEC_LDSA:
8167 return BEGIN_DATA | BEGIN_CONTROL;
8169 default:
8170 return 0;
8174 /* If INSN is a speculative load return a ds with the speculation types that
8175 will be checked.
8176 Otherwise [if INSN is a normal instruction] return 0. */
8177 static ds_t
8178 ia64_get_insn_checked_ds (rtx_insn *insn)
8180 int code = get_insn_spec_code (insn);
8182 switch (code)
8184 case UNSPEC_LDA:
8185 return BEGIN_DATA | BEGIN_CONTROL;
8187 case UNSPEC_LDS:
8188 return BEGIN_CONTROL;
8190 case UNSPEC_LDS_A:
8191 case UNSPEC_LDSA:
8192 return BEGIN_DATA | BEGIN_CONTROL;
8194 default:
8195 return 0;
8199 /* If GEN_P is true, calculate the index of needed speculation check and return
8200 speculative pattern for INSN with speculative mode TS, machine mode
8201 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8202 If GEN_P is false, just calculate the index of needed speculation check. */
8203 static rtx
8204 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8206 rtx pat, new_pat;
8207 gen_func_t gen_load;
8209 gen_load = get_spec_load_gen_function (ts, mode_no);
8211 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8212 copy_rtx (recog_data.operand[1]));
8214 pat = PATTERN (insn);
8215 if (GET_CODE (pat) == COND_EXEC)
8216 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8217 new_pat);
8219 return new_pat;
8222 static bool
8223 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8224 ds_t ds ATTRIBUTE_UNUSED)
8226 return false;
8229 /* Implement targetm.sched.speculate_insn hook.
8230 Check if the INSN can be TS speculative.
8231 If 'no' - return -1.
8232 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8233 If current pattern of the INSN already provides TS speculation,
8234 return 0. */
8235 static int
8236 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8238 int mode_no;
8239 int res;
8241 gcc_assert (!(ts & ~SPECULATIVE));
8243 if (ia64_spec_check_p (insn))
8244 return -1;
8246 if ((ts & BE_IN_SPEC)
8247 && !insn_can_be_in_speculative_p (insn, ts))
8248 return -1;
8250 mode_no = get_mode_no_for_insn (insn);
8252 if (mode_no != SPEC_MODE_INVALID)
8254 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8255 res = 0;
8256 else
8258 res = 1;
8259 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8262 else
8263 res = -1;
8265 return res;
8268 /* Return a function that will generate a check for speculation TS with mode
8269 MODE_NO.
8270 If simple check is needed, pass true for SIMPLE_CHECK_P.
8271 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8272 static gen_func_t
8273 get_spec_check_gen_function (ds_t ts, int mode_no,
8274 bool simple_check_p, bool clearing_check_p)
8276 static gen_func_t gen_ld_c_clr[] = {
8277 gen_movbi_clr,
8278 gen_movqi_clr,
8279 gen_movhi_clr,
8280 gen_movsi_clr,
8281 gen_movdi_clr,
8282 gen_movsf_clr,
8283 gen_movdf_clr,
8284 gen_movxf_clr,
8285 gen_movti_clr,
8286 gen_zero_extendqidi2_clr,
8287 gen_zero_extendhidi2_clr,
8288 gen_zero_extendsidi2_clr,
8290 static gen_func_t gen_ld_c_nc[] = {
8291 gen_movbi_nc,
8292 gen_movqi_nc,
8293 gen_movhi_nc,
8294 gen_movsi_nc,
8295 gen_movdi_nc,
8296 gen_movsf_nc,
8297 gen_movdf_nc,
8298 gen_movxf_nc,
8299 gen_movti_nc,
8300 gen_zero_extendqidi2_nc,
8301 gen_zero_extendhidi2_nc,
8302 gen_zero_extendsidi2_nc,
8304 static gen_func_t gen_chk_a_clr[] = {
8305 gen_advanced_load_check_clr_bi,
8306 gen_advanced_load_check_clr_qi,
8307 gen_advanced_load_check_clr_hi,
8308 gen_advanced_load_check_clr_si,
8309 gen_advanced_load_check_clr_di,
8310 gen_advanced_load_check_clr_sf,
8311 gen_advanced_load_check_clr_df,
8312 gen_advanced_load_check_clr_xf,
8313 gen_advanced_load_check_clr_ti,
8314 gen_advanced_load_check_clr_di,
8315 gen_advanced_load_check_clr_di,
8316 gen_advanced_load_check_clr_di,
8318 static gen_func_t gen_chk_a_nc[] = {
8319 gen_advanced_load_check_nc_bi,
8320 gen_advanced_load_check_nc_qi,
8321 gen_advanced_load_check_nc_hi,
8322 gen_advanced_load_check_nc_si,
8323 gen_advanced_load_check_nc_di,
8324 gen_advanced_load_check_nc_sf,
8325 gen_advanced_load_check_nc_df,
8326 gen_advanced_load_check_nc_xf,
8327 gen_advanced_load_check_nc_ti,
8328 gen_advanced_load_check_nc_di,
8329 gen_advanced_load_check_nc_di,
8330 gen_advanced_load_check_nc_di,
8332 static gen_func_t gen_chk_s[] = {
8333 gen_speculation_check_bi,
8334 gen_speculation_check_qi,
8335 gen_speculation_check_hi,
8336 gen_speculation_check_si,
8337 gen_speculation_check_di,
8338 gen_speculation_check_sf,
8339 gen_speculation_check_df,
8340 gen_speculation_check_xf,
8341 gen_speculation_check_ti,
8342 gen_speculation_check_di,
8343 gen_speculation_check_di,
8344 gen_speculation_check_di,
8347 gen_func_t *gen_check;
8349 if (ts & BEGIN_DATA)
8351 /* We don't need recovery because even if this is ld.sa
8352 ALAT entry will be allocated only if NAT bit is set to zero.
8353 So it is enough to use ld.c here. */
8355 if (simple_check_p)
8357 gcc_assert (mflag_sched_spec_ldc);
8359 if (clearing_check_p)
8360 gen_check = gen_ld_c_clr;
8361 else
8362 gen_check = gen_ld_c_nc;
8364 else
8366 if (clearing_check_p)
8367 gen_check = gen_chk_a_clr;
8368 else
8369 gen_check = gen_chk_a_nc;
8372 else if (ts & BEGIN_CONTROL)
8374 if (simple_check_p)
8375 /* We might want to use ld.sa -> ld.c instead of
8376 ld.s -> chk.s. */
8378 gcc_assert (!ia64_needs_block_p (ts));
8380 if (clearing_check_p)
8381 gen_check = gen_ld_c_clr;
8382 else
8383 gen_check = gen_ld_c_nc;
8385 else
8387 gen_check = gen_chk_s;
8390 else
8391 gcc_unreachable ();
8393 gcc_assert (mode_no >= 0);
8394 return gen_check[mode_no];
8397 /* Return nonzero, if INSN needs branchy recovery check. */
8398 static bool
8399 ia64_needs_block_p (ds_t ts)
8401 if (ts & BEGIN_DATA)
8402 return !mflag_sched_spec_ldc;
8404 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8406 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8409 /* Generate (or regenerate) a recovery check for INSN. */
8410 static rtx
8411 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8413 rtx op1, pat, check_pat;
8414 gen_func_t gen_check;
8415 int mode_no;
8417 mode_no = get_mode_no_for_insn (insn);
8418 gcc_assert (mode_no >= 0);
8420 if (label)
8421 op1 = label;
8422 else
8424 gcc_assert (!ia64_needs_block_p (ds));
8425 op1 = copy_rtx (recog_data.operand[1]);
8428 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8429 true);
8431 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8433 pat = PATTERN (insn);
8434 if (GET_CODE (pat) == COND_EXEC)
8435 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8436 check_pat);
8438 return check_pat;
8441 /* Return nonzero, if X is branchy recovery check. */
8442 static int
8443 ia64_spec_check_p (rtx x)
8445 x = PATTERN (x);
8446 if (GET_CODE (x) == COND_EXEC)
8447 x = COND_EXEC_CODE (x);
8448 if (GET_CODE (x) == SET)
8449 return ia64_spec_check_src_p (SET_SRC (x));
8450 return 0;
8453 /* Return nonzero, if SRC belongs to recovery check. */
8454 static int
8455 ia64_spec_check_src_p (rtx src)
8457 if (GET_CODE (src) == IF_THEN_ELSE)
8459 rtx t;
8461 t = XEXP (src, 0);
8462 if (GET_CODE (t) == NE)
8464 t = XEXP (t, 0);
8466 if (GET_CODE (t) == UNSPEC)
8468 int code;
8470 code = XINT (t, 1);
8472 if (code == UNSPEC_LDCCLR
8473 || code == UNSPEC_LDCNC
8474 || code == UNSPEC_CHKACLR
8475 || code == UNSPEC_CHKANC
8476 || code == UNSPEC_CHKS)
8478 gcc_assert (code != 0);
8479 return code;
8484 return 0;
8488 /* The following page contains abstract data `bundle states' which are
8489 used for bundling insns (inserting nops and template generation). */
8491 /* The following describes state of insn bundling. */
8493 struct bundle_state
8495 /* Unique bundle state number to identify them in the debugging
8496 output */
8497 int unique_num;
8498 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
8499 /* number nops before and after the insn */
8500 short before_nops_num, after_nops_num;
8501 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8502 insn */
8503 int cost; /* cost of the state in cycles */
8504 int accumulated_insns_num; /* number of all previous insns including
8505 nops. L is considered as 2 insns */
8506 int branch_deviation; /* deviation of previous branches from 3rd slots */
8507 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8508 struct bundle_state *next; /* next state with the same insn_num */
8509 struct bundle_state *originator; /* originator (previous insn state) */
8510 /* All bundle states are in the following chain. */
8511 struct bundle_state *allocated_states_chain;
8512 /* The DFA State after issuing the insn and the nops. */
8513 state_t dfa_state;
8516 /* The following is map insn number to the corresponding bundle state. */
8518 static struct bundle_state **index_to_bundle_states;
8520 /* The unique number of next bundle state. */
8522 static int bundle_states_num;
8524 /* All allocated bundle states are in the following chain. */
8526 static struct bundle_state *allocated_bundle_states_chain;
8528 /* All allocated but not used bundle states are in the following
8529 chain. */
8531 static struct bundle_state *free_bundle_state_chain;
8534 /* The following function returns a free bundle state. */
8536 static struct bundle_state *
8537 get_free_bundle_state (void)
8539 struct bundle_state *result;
8541 if (free_bundle_state_chain != NULL)
8543 result = free_bundle_state_chain;
8544 free_bundle_state_chain = result->next;
8546 else
8548 result = XNEW (struct bundle_state);
8549 result->dfa_state = xmalloc (dfa_state_size);
8550 result->allocated_states_chain = allocated_bundle_states_chain;
8551 allocated_bundle_states_chain = result;
8553 result->unique_num = bundle_states_num++;
8554 return result;
8558 /* The following function frees given bundle state. */
8560 static void
8561 free_bundle_state (struct bundle_state *state)
8563 state->next = free_bundle_state_chain;
8564 free_bundle_state_chain = state;
8567 /* Start work with abstract data `bundle states'. */
8569 static void
8570 initiate_bundle_states (void)
8572 bundle_states_num = 0;
8573 free_bundle_state_chain = NULL;
8574 allocated_bundle_states_chain = NULL;
8577 /* Finish work with abstract data `bundle states'. */
8579 static void
8580 finish_bundle_states (void)
8582 struct bundle_state *curr_state, *next_state;
8584 for (curr_state = allocated_bundle_states_chain;
8585 curr_state != NULL;
8586 curr_state = next_state)
8588 next_state = curr_state->allocated_states_chain;
8589 free (curr_state->dfa_state);
8590 free (curr_state);
8594 /* Hashtable helpers. */
8596 struct bundle_state_hasher : typed_noop_remove <bundle_state>
8598 typedef bundle_state value_type;
8599 typedef bundle_state compare_type;
8600 static inline hashval_t hash (const value_type *);
8601 static inline bool equal (const value_type *, const compare_type *);
8604 /* The function returns hash of BUNDLE_STATE. */
8606 inline hashval_t
8607 bundle_state_hasher::hash (const value_type *state)
8609 unsigned result, i;
8611 for (result = i = 0; i < dfa_state_size; i++)
8612 result += (((unsigned char *) state->dfa_state) [i]
8613 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8614 return result + state->insn_num;
8617 /* The function returns nonzero if the bundle state keys are equal. */
8619 inline bool
8620 bundle_state_hasher::equal (const value_type *state1,
8621 const compare_type *state2)
8623 return (state1->insn_num == state2->insn_num
8624 && memcmp (state1->dfa_state, state2->dfa_state,
8625 dfa_state_size) == 0);
8628 /* Hash table of the bundle states. The key is dfa_state and insn_num
8629 of the bundle states. */
8631 static hash_table<bundle_state_hasher> *bundle_state_table;
8633 /* The function inserts the BUNDLE_STATE into the hash table. The
8634 function returns nonzero if the bundle has been inserted into the
8635 table. The table contains the best bundle state with given key. */
8637 static int
8638 insert_bundle_state (struct bundle_state *bundle_state)
8640 struct bundle_state **entry_ptr;
8642 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8643 if (*entry_ptr == NULL)
8645 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8646 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8647 *entry_ptr = bundle_state;
8648 return TRUE;
8650 else if (bundle_state->cost < (*entry_ptr)->cost
8651 || (bundle_state->cost == (*entry_ptr)->cost
8652 && ((*entry_ptr)->accumulated_insns_num
8653 > bundle_state->accumulated_insns_num
8654 || ((*entry_ptr)->accumulated_insns_num
8655 == bundle_state->accumulated_insns_num
8656 && ((*entry_ptr)->branch_deviation
8657 > bundle_state->branch_deviation
8658 || ((*entry_ptr)->branch_deviation
8659 == bundle_state->branch_deviation
8660 && (*entry_ptr)->middle_bundle_stops
8661 > bundle_state->middle_bundle_stops))))))
8664 struct bundle_state temp;
8666 temp = **entry_ptr;
8667 **entry_ptr = *bundle_state;
8668 (*entry_ptr)->next = temp.next;
8669 *bundle_state = temp;
8671 return FALSE;
8674 /* Start work with the hash table. */
8676 static void
8677 initiate_bundle_state_table (void)
8679 bundle_state_table = new hash_table<bundle_state_hasher> (50);
8682 /* Finish work with the hash table. */
8684 static void
8685 finish_bundle_state_table (void)
8687 delete bundle_state_table;
8688 bundle_state_table = NULL;
8693 /* The following variable is a insn `nop' used to check bundle states
8694 with different number of inserted nops. */
8696 static rtx_insn *ia64_nop;
8698 /* The following function tries to issue NOPS_NUM nops for the current
8699 state without advancing processor cycle. If it failed, the
8700 function returns FALSE and frees the current state. */
8702 static int
8703 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8705 int i;
8707 for (i = 0; i < nops_num; i++)
8708 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8710 free_bundle_state (curr_state);
8711 return FALSE;
8713 return TRUE;
8716 /* The following function tries to issue INSN for the current
8717 state without advancing processor cycle. If it failed, the
8718 function returns FALSE and frees the current state. */
8720 static int
8721 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8723 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8725 free_bundle_state (curr_state);
8726 return FALSE;
8728 return TRUE;
8731 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8732 starting with ORIGINATOR without advancing processor cycle. If
8733 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8734 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8735 If it was successful, the function creates new bundle state and
8736 insert into the hash table and into `index_to_bundle_states'. */
8738 static void
8739 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8740 rtx_insn *insn, int try_bundle_end_p,
8741 int only_bundle_end_p)
8743 struct bundle_state *curr_state;
8745 curr_state = get_free_bundle_state ();
8746 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8747 curr_state->insn = insn;
8748 curr_state->insn_num = originator->insn_num + 1;
8749 curr_state->cost = originator->cost;
8750 curr_state->originator = originator;
8751 curr_state->before_nops_num = before_nops_num;
8752 curr_state->after_nops_num = 0;
8753 curr_state->accumulated_insns_num
8754 = originator->accumulated_insns_num + before_nops_num;
8755 curr_state->branch_deviation = originator->branch_deviation;
8756 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8757 gcc_assert (insn);
8758 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8760 gcc_assert (GET_MODE (insn) != TImode);
8761 if (!try_issue_nops (curr_state, before_nops_num))
8762 return;
8763 if (!try_issue_insn (curr_state, insn))
8764 return;
8765 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8766 if (curr_state->accumulated_insns_num % 3 != 0)
8767 curr_state->middle_bundle_stops++;
8768 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8769 && curr_state->accumulated_insns_num % 3 != 0)
8771 free_bundle_state (curr_state);
8772 return;
8775 else if (GET_MODE (insn) != TImode)
8777 if (!try_issue_nops (curr_state, before_nops_num))
8778 return;
8779 if (!try_issue_insn (curr_state, insn))
8780 return;
8781 curr_state->accumulated_insns_num++;
8782 gcc_assert (!unknown_for_bundling_p (insn));
8784 if (ia64_safe_type (insn) == TYPE_L)
8785 curr_state->accumulated_insns_num++;
8787 else
8789 /* If this is an insn that must be first in a group, then don't allow
8790 nops to be emitted before it. Currently, alloc is the only such
8791 supported instruction. */
8792 /* ??? The bundling automatons should handle this for us, but they do
8793 not yet have support for the first_insn attribute. */
8794 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8796 free_bundle_state (curr_state);
8797 return;
8800 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8801 state_transition (curr_state->dfa_state, NULL);
8802 curr_state->cost++;
8803 if (!try_issue_nops (curr_state, before_nops_num))
8804 return;
8805 if (!try_issue_insn (curr_state, insn))
8806 return;
8807 curr_state->accumulated_insns_num++;
8808 if (unknown_for_bundling_p (insn))
8810 /* Finish bundle containing asm insn. */
8811 curr_state->after_nops_num
8812 = 3 - curr_state->accumulated_insns_num % 3;
8813 curr_state->accumulated_insns_num
8814 += 3 - curr_state->accumulated_insns_num % 3;
8816 else if (ia64_safe_type (insn) == TYPE_L)
8817 curr_state->accumulated_insns_num++;
8819 if (ia64_safe_type (insn) == TYPE_B)
8820 curr_state->branch_deviation
8821 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8822 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8824 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8826 state_t dfa_state;
8827 struct bundle_state *curr_state1;
8828 struct bundle_state *allocated_states_chain;
8830 curr_state1 = get_free_bundle_state ();
8831 dfa_state = curr_state1->dfa_state;
8832 allocated_states_chain = curr_state1->allocated_states_chain;
8833 *curr_state1 = *curr_state;
8834 curr_state1->dfa_state = dfa_state;
8835 curr_state1->allocated_states_chain = allocated_states_chain;
8836 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8837 dfa_state_size);
8838 curr_state = curr_state1;
8840 if (!try_issue_nops (curr_state,
8841 3 - curr_state->accumulated_insns_num % 3))
8842 return;
8843 curr_state->after_nops_num
8844 = 3 - curr_state->accumulated_insns_num % 3;
8845 curr_state->accumulated_insns_num
8846 += 3 - curr_state->accumulated_insns_num % 3;
8848 if (!insert_bundle_state (curr_state))
8849 free_bundle_state (curr_state);
8850 return;
8853 /* The following function returns position in the two window bundle
8854 for given STATE. */
8856 static int
8857 get_max_pos (state_t state)
8859 if (cpu_unit_reservation_p (state, pos_6))
8860 return 6;
8861 else if (cpu_unit_reservation_p (state, pos_5))
8862 return 5;
8863 else if (cpu_unit_reservation_p (state, pos_4))
8864 return 4;
8865 else if (cpu_unit_reservation_p (state, pos_3))
8866 return 3;
8867 else if (cpu_unit_reservation_p (state, pos_2))
8868 return 2;
8869 else if (cpu_unit_reservation_p (state, pos_1))
8870 return 1;
8871 else
8872 return 0;
8875 /* The function returns code of a possible template for given position
8876 and state. The function should be called only with 2 values of
8877 position equal to 3 or 6. We avoid generating F NOPs by putting
8878 templates containing F insns at the end of the template search
8879 because undocumented anomaly in McKinley derived cores which can
8880 cause stalls if an F-unit insn (including a NOP) is issued within a
8881 six-cycle window after reading certain application registers (such
8882 as ar.bsp). Furthermore, power-considerations also argue against
8883 the use of F-unit instructions unless they're really needed. */
8885 static int
8886 get_template (state_t state, int pos)
8888 switch (pos)
8890 case 3:
8891 if (cpu_unit_reservation_p (state, _0mmi_))
8892 return 1;
8893 else if (cpu_unit_reservation_p (state, _0mii_))
8894 return 0;
8895 else if (cpu_unit_reservation_p (state, _0mmb_))
8896 return 7;
8897 else if (cpu_unit_reservation_p (state, _0mib_))
8898 return 6;
8899 else if (cpu_unit_reservation_p (state, _0mbb_))
8900 return 5;
8901 else if (cpu_unit_reservation_p (state, _0bbb_))
8902 return 4;
8903 else if (cpu_unit_reservation_p (state, _0mmf_))
8904 return 3;
8905 else if (cpu_unit_reservation_p (state, _0mfi_))
8906 return 2;
8907 else if (cpu_unit_reservation_p (state, _0mfb_))
8908 return 8;
8909 else if (cpu_unit_reservation_p (state, _0mlx_))
8910 return 9;
8911 else
8912 gcc_unreachable ();
8913 case 6:
8914 if (cpu_unit_reservation_p (state, _1mmi_))
8915 return 1;
8916 else if (cpu_unit_reservation_p (state, _1mii_))
8917 return 0;
8918 else if (cpu_unit_reservation_p (state, _1mmb_))
8919 return 7;
8920 else if (cpu_unit_reservation_p (state, _1mib_))
8921 return 6;
8922 else if (cpu_unit_reservation_p (state, _1mbb_))
8923 return 5;
8924 else if (cpu_unit_reservation_p (state, _1bbb_))
8925 return 4;
8926 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8927 return 3;
8928 else if (cpu_unit_reservation_p (state, _1mfi_))
8929 return 2;
8930 else if (cpu_unit_reservation_p (state, _1mfb_))
8931 return 8;
8932 else if (cpu_unit_reservation_p (state, _1mlx_))
8933 return 9;
8934 else
8935 gcc_unreachable ();
8936 default:
8937 gcc_unreachable ();
8941 /* True when INSN is important for bundling. */
8943 static bool
8944 important_for_bundling_p (rtx_insn *insn)
8946 return (INSN_P (insn)
8947 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8948 && GET_CODE (PATTERN (insn)) != USE
8949 && GET_CODE (PATTERN (insn)) != CLOBBER);
8952 /* The following function returns an insn important for insn bundling
8953 followed by INSN and before TAIL. */
8955 static rtx_insn *
8956 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
8958 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8959 if (important_for_bundling_p (insn))
8960 return insn;
8961 return NULL;
8964 /* True when INSN is unknown, but important, for bundling. */
8966 static bool
8967 unknown_for_bundling_p (rtx_insn *insn)
8969 return (INSN_P (insn)
8970 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8971 && GET_CODE (PATTERN (insn)) != USE
8972 && GET_CODE (PATTERN (insn)) != CLOBBER);
8975 /* Add a bundle selector TEMPLATE0 before INSN. */
8977 static void
8978 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
8980 rtx b = gen_bundle_selector (GEN_INT (template0));
8982 ia64_emit_insn_before (b, insn);
8983 #if NR_BUNDLES == 10
8984 if ((template0 == 4 || template0 == 5)
8985 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8987 int i;
8988 rtx note = NULL_RTX;
8990 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8991 first or second slot. If it is and has REG_EH_NOTE set, copy it
8992 to following nops, as br.call sets rp to the address of following
8993 bundle and therefore an EH region end must be on a bundle
8994 boundary. */
8995 insn = PREV_INSN (insn);
8996 for (i = 0; i < 3; i++)
8999 insn = next_active_insn (insn);
9000 while (NONJUMP_INSN_P (insn)
9001 && get_attr_empty (insn) == EMPTY_YES);
9002 if (CALL_P (insn))
9003 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9004 else if (note)
9006 int code;
9008 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9009 || code == CODE_FOR_nop_b);
9010 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9011 note = NULL_RTX;
9012 else
9013 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9017 #endif
9020 /* The following function does insn bundling. Bundling means
9021 inserting templates and nop insns to fit insn groups into permitted
9022 templates. Instruction scheduling uses NDFA (non-deterministic
9023 finite automata) encoding informations about the templates and the
9024 inserted nops. Nondeterminism of the automata permits follows
9025 all possible insn sequences very fast.
9027 Unfortunately it is not possible to get information about inserting
9028 nop insns and used templates from the automata states. The
9029 automata only says that we can issue an insn possibly inserting
9030 some nops before it and using some template. Therefore insn
9031 bundling in this function is implemented by using DFA
9032 (deterministic finite automata). We follow all possible insn
9033 sequences by inserting 0-2 nops (that is what the NDFA describe for
9034 insn scheduling) before/after each insn being bundled. We know the
9035 start of simulated processor cycle from insn scheduling (insn
9036 starting a new cycle has TImode).
9038 Simple implementation of insn bundling would create enormous
9039 number of possible insn sequences satisfying information about new
9040 cycle ticks taken from the insn scheduling. To make the algorithm
9041 practical we use dynamic programming. Each decision (about
9042 inserting nops and implicitly about previous decisions) is described
9043 by structure bundle_state (see above). If we generate the same
9044 bundle state (key is automaton state after issuing the insns and
9045 nops for it), we reuse already generated one. As consequence we
9046 reject some decisions which cannot improve the solution and
9047 reduce memory for the algorithm.
9049 When we reach the end of EBB (extended basic block), we choose the
9050 best sequence and then, moving back in EBB, insert templates for
9051 the best alternative. The templates are taken from querying
9052 automaton state for each insn in chosen bundle states.
9054 So the algorithm makes two (forward and backward) passes through
9055 EBB. */
9057 static void
9058 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9060 struct bundle_state *curr_state, *next_state, *best_state;
9061 rtx_insn *insn, *next_insn;
9062 int insn_num;
9063 int i, bundle_end_p, only_bundle_end_p, asm_p;
9064 int pos = 0, max_pos, template0, template1;
9065 rtx_insn *b;
9066 enum attr_type type;
9068 insn_num = 0;
9069 /* Count insns in the EBB. */
9070 for (insn = NEXT_INSN (prev_head_insn);
9071 insn && insn != tail;
9072 insn = NEXT_INSN (insn))
9073 if (INSN_P (insn))
9074 insn_num++;
9075 if (insn_num == 0)
9076 return;
9077 bundling_p = 1;
9078 dfa_clean_insn_cache ();
9079 initiate_bundle_state_table ();
9080 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9081 /* First (forward) pass -- generation of bundle states. */
9082 curr_state = get_free_bundle_state ();
9083 curr_state->insn = NULL;
9084 curr_state->before_nops_num = 0;
9085 curr_state->after_nops_num = 0;
9086 curr_state->insn_num = 0;
9087 curr_state->cost = 0;
9088 curr_state->accumulated_insns_num = 0;
9089 curr_state->branch_deviation = 0;
9090 curr_state->middle_bundle_stops = 0;
9091 curr_state->next = NULL;
9092 curr_state->originator = NULL;
9093 state_reset (curr_state->dfa_state);
9094 index_to_bundle_states [0] = curr_state;
9095 insn_num = 0;
9096 /* Shift cycle mark if it is put on insn which could be ignored. */
9097 for (insn = NEXT_INSN (prev_head_insn);
9098 insn != tail;
9099 insn = NEXT_INSN (insn))
9100 if (INSN_P (insn)
9101 && !important_for_bundling_p (insn)
9102 && GET_MODE (insn) == TImode)
9104 PUT_MODE (insn, VOIDmode);
9105 for (next_insn = NEXT_INSN (insn);
9106 next_insn != tail;
9107 next_insn = NEXT_INSN (next_insn))
9108 if (important_for_bundling_p (next_insn)
9109 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9111 PUT_MODE (next_insn, TImode);
9112 break;
9115 /* Forward pass: generation of bundle states. */
9116 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9117 insn != NULL_RTX;
9118 insn = next_insn)
9120 gcc_assert (important_for_bundling_p (insn));
9121 type = ia64_safe_type (insn);
9122 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9123 insn_num++;
9124 index_to_bundle_states [insn_num] = NULL;
9125 for (curr_state = index_to_bundle_states [insn_num - 1];
9126 curr_state != NULL;
9127 curr_state = next_state)
9129 pos = curr_state->accumulated_insns_num % 3;
9130 next_state = curr_state->next;
9131 /* We must fill up the current bundle in order to start a
9132 subsequent asm insn in a new bundle. Asm insn is always
9133 placed in a separate bundle. */
9134 only_bundle_end_p
9135 = (next_insn != NULL_RTX
9136 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9137 && unknown_for_bundling_p (next_insn));
9138 /* We may fill up the current bundle if it is the cycle end
9139 without a group barrier. */
9140 bundle_end_p
9141 = (only_bundle_end_p || next_insn == NULL_RTX
9142 || (GET_MODE (next_insn) == TImode
9143 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9144 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9145 || type == TYPE_S)
9146 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9147 only_bundle_end_p);
9148 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9149 only_bundle_end_p);
9150 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9151 only_bundle_end_p);
9153 gcc_assert (index_to_bundle_states [insn_num]);
9154 for (curr_state = index_to_bundle_states [insn_num];
9155 curr_state != NULL;
9156 curr_state = curr_state->next)
9157 if (verbose >= 2 && dump)
9159 /* This structure is taken from generated code of the
9160 pipeline hazard recognizer (see file insn-attrtab.c).
9161 Please don't forget to change the structure if a new
9162 automaton is added to .md file. */
9163 struct DFA_chip
9165 unsigned short one_automaton_state;
9166 unsigned short oneb_automaton_state;
9167 unsigned short two_automaton_state;
9168 unsigned short twob_automaton_state;
9171 fprintf
9172 (dump,
9173 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9174 curr_state->unique_num,
9175 (curr_state->originator == NULL
9176 ? -1 : curr_state->originator->unique_num),
9177 curr_state->cost,
9178 curr_state->before_nops_num, curr_state->after_nops_num,
9179 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9180 curr_state->middle_bundle_stops,
9181 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9182 INSN_UID (insn));
9186 /* We should find a solution because the 2nd insn scheduling has
9187 found one. */
9188 gcc_assert (index_to_bundle_states [insn_num]);
9189 /* Find a state corresponding to the best insn sequence. */
9190 best_state = NULL;
9191 for (curr_state = index_to_bundle_states [insn_num];
9192 curr_state != NULL;
9193 curr_state = curr_state->next)
9194 /* We are just looking at the states with fully filled up last
9195 bundle. The first we prefer insn sequences with minimal cost
9196 then with minimal inserted nops and finally with branch insns
9197 placed in the 3rd slots. */
9198 if (curr_state->accumulated_insns_num % 3 == 0
9199 && (best_state == NULL || best_state->cost > curr_state->cost
9200 || (best_state->cost == curr_state->cost
9201 && (curr_state->accumulated_insns_num
9202 < best_state->accumulated_insns_num
9203 || (curr_state->accumulated_insns_num
9204 == best_state->accumulated_insns_num
9205 && (curr_state->branch_deviation
9206 < best_state->branch_deviation
9207 || (curr_state->branch_deviation
9208 == best_state->branch_deviation
9209 && curr_state->middle_bundle_stops
9210 < best_state->middle_bundle_stops)))))))
9211 best_state = curr_state;
9212 /* Second (backward) pass: adding nops and templates. */
9213 gcc_assert (best_state);
9214 insn_num = best_state->before_nops_num;
9215 template0 = template1 = -1;
9216 for (curr_state = best_state;
9217 curr_state->originator != NULL;
9218 curr_state = curr_state->originator)
9220 insn = curr_state->insn;
9221 asm_p = unknown_for_bundling_p (insn);
9222 insn_num++;
9223 if (verbose >= 2 && dump)
9225 struct DFA_chip
9227 unsigned short one_automaton_state;
9228 unsigned short oneb_automaton_state;
9229 unsigned short two_automaton_state;
9230 unsigned short twob_automaton_state;
9233 fprintf
9234 (dump,
9235 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9236 curr_state->unique_num,
9237 (curr_state->originator == NULL
9238 ? -1 : curr_state->originator->unique_num),
9239 curr_state->cost,
9240 curr_state->before_nops_num, curr_state->after_nops_num,
9241 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9242 curr_state->middle_bundle_stops,
9243 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9244 INSN_UID (insn));
9246 /* Find the position in the current bundle window. The window can
9247 contain at most two bundles. Two bundle window means that
9248 the processor will make two bundle rotation. */
9249 max_pos = get_max_pos (curr_state->dfa_state);
9250 if (max_pos == 6
9251 /* The following (negative template number) means that the
9252 processor did one bundle rotation. */
9253 || (max_pos == 3 && template0 < 0))
9255 /* We are at the end of the window -- find template(s) for
9256 its bundle(s). */
9257 pos = max_pos;
9258 if (max_pos == 3)
9259 template0 = get_template (curr_state->dfa_state, 3);
9260 else
9262 template1 = get_template (curr_state->dfa_state, 3);
9263 template0 = get_template (curr_state->dfa_state, 6);
9266 if (max_pos > 3 && template1 < 0)
9267 /* It may happen when we have the stop inside a bundle. */
9269 gcc_assert (pos <= 3);
9270 template1 = get_template (curr_state->dfa_state, 3);
9271 pos += 3;
9273 if (!asm_p)
9274 /* Emit nops after the current insn. */
9275 for (i = 0; i < curr_state->after_nops_num; i++)
9277 rtx nop_pat = gen_nop ();
9278 rtx_insn *nop = emit_insn_after (nop_pat, insn);
9279 pos--;
9280 gcc_assert (pos >= 0);
9281 if (pos % 3 == 0)
9283 /* We are at the start of a bundle: emit the template
9284 (it should be defined). */
9285 gcc_assert (template0 >= 0);
9286 ia64_add_bundle_selector_before (template0, nop);
9287 /* If we have two bundle window, we make one bundle
9288 rotation. Otherwise template0 will be undefined
9289 (negative value). */
9290 template0 = template1;
9291 template1 = -1;
9294 /* Move the position backward in the window. Group barrier has
9295 no slot. Asm insn takes all bundle. */
9296 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9297 && !unknown_for_bundling_p (insn))
9298 pos--;
9299 /* Long insn takes 2 slots. */
9300 if (ia64_safe_type (insn) == TYPE_L)
9301 pos--;
9302 gcc_assert (pos >= 0);
9303 if (pos % 3 == 0
9304 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9305 && !unknown_for_bundling_p (insn))
9307 /* The current insn is at the bundle start: emit the
9308 template. */
9309 gcc_assert (template0 >= 0);
9310 ia64_add_bundle_selector_before (template0, insn);
9311 b = PREV_INSN (insn);
9312 insn = b;
9313 /* See comment above in analogous place for emitting nops
9314 after the insn. */
9315 template0 = template1;
9316 template1 = -1;
9318 /* Emit nops after the current insn. */
9319 for (i = 0; i < curr_state->before_nops_num; i++)
9321 rtx nop_pat = gen_nop ();
9322 ia64_emit_insn_before (nop_pat, insn);
9323 rtx_insn *nop = PREV_INSN (insn);
9324 insn = nop;
9325 pos--;
9326 gcc_assert (pos >= 0);
9327 if (pos % 3 == 0)
9329 /* See comment above in analogous place for emitting nops
9330 after the insn. */
9331 gcc_assert (template0 >= 0);
9332 ia64_add_bundle_selector_before (template0, insn);
9333 b = PREV_INSN (insn);
9334 insn = b;
9335 template0 = template1;
9336 template1 = -1;
9341 #ifdef ENABLE_CHECKING
9343 /* Assert right calculation of middle_bundle_stops. */
9344 int num = best_state->middle_bundle_stops;
9345 bool start_bundle = true, end_bundle = false;
9347 for (insn = NEXT_INSN (prev_head_insn);
9348 insn && insn != tail;
9349 insn = NEXT_INSN (insn))
9351 if (!INSN_P (insn))
9352 continue;
9353 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9354 start_bundle = true;
9355 else
9357 rtx_insn *next_insn;
9359 for (next_insn = NEXT_INSN (insn);
9360 next_insn && next_insn != tail;
9361 next_insn = NEXT_INSN (next_insn))
9362 if (INSN_P (next_insn)
9363 && (ia64_safe_itanium_class (next_insn)
9364 != ITANIUM_CLASS_IGNORE
9365 || recog_memoized (next_insn)
9366 == CODE_FOR_bundle_selector)
9367 && GET_CODE (PATTERN (next_insn)) != USE
9368 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9369 break;
9371 end_bundle = next_insn == NULL_RTX
9372 || next_insn == tail
9373 || (INSN_P (next_insn)
9374 && recog_memoized (next_insn)
9375 == CODE_FOR_bundle_selector);
9376 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9377 && !start_bundle && !end_bundle
9378 && next_insn
9379 && !unknown_for_bundling_p (next_insn))
9380 num--;
9382 start_bundle = false;
9386 gcc_assert (num == 0);
9388 #endif
9390 free (index_to_bundle_states);
9391 finish_bundle_state_table ();
9392 bundling_p = 0;
9393 dfa_clean_insn_cache ();
9396 /* The following function is called at the end of scheduling BB or
9397 EBB. After reload, it inserts stop bits and does insn bundling. */
9399 static void
9400 ia64_sched_finish (FILE *dump, int sched_verbose)
9402 if (sched_verbose)
9403 fprintf (dump, "// Finishing schedule.\n");
9404 if (!reload_completed)
9405 return;
9406 if (reload_completed)
9408 final_emit_insn_group_barriers (dump);
9409 bundling (dump, sched_verbose, current_sched_info->prev_head,
9410 current_sched_info->next_tail);
9411 if (sched_verbose && dump)
9412 fprintf (dump, "// finishing %d-%d\n",
9413 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9414 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9416 return;
9420 /* The following function inserts stop bits in scheduled BB or EBB. */
9422 static void
9423 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9425 rtx_insn *insn;
9426 int need_barrier_p = 0;
9427 int seen_good_insn = 0;
9429 init_insn_group_barriers ();
9431 for (insn = NEXT_INSN (current_sched_info->prev_head);
9432 insn != current_sched_info->next_tail;
9433 insn = NEXT_INSN (insn))
9435 if (BARRIER_P (insn))
9437 rtx_insn *last = prev_active_insn (insn);
9439 if (! last)
9440 continue;
9441 if (JUMP_TABLE_DATA_P (last))
9442 last = prev_active_insn (last);
9443 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9444 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9446 init_insn_group_barriers ();
9447 seen_good_insn = 0;
9448 need_barrier_p = 0;
9450 else if (NONDEBUG_INSN_P (insn))
9452 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9454 init_insn_group_barriers ();
9455 seen_good_insn = 0;
9456 need_barrier_p = 0;
9458 else if (need_barrier_p || group_barrier_needed (insn)
9459 || (mflag_sched_stop_bits_after_every_cycle
9460 && GET_MODE (insn) == TImode
9461 && seen_good_insn))
9463 if (TARGET_EARLY_STOP_BITS)
9465 rtx_insn *last;
9467 for (last = insn;
9468 last != current_sched_info->prev_head;
9469 last = PREV_INSN (last))
9470 if (INSN_P (last) && GET_MODE (last) == TImode
9471 && stops_p [INSN_UID (last)])
9472 break;
9473 if (last == current_sched_info->prev_head)
9474 last = insn;
9475 last = prev_active_insn (last);
9476 if (last
9477 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9478 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9479 last);
9480 init_insn_group_barriers ();
9481 for (last = NEXT_INSN (last);
9482 last != insn;
9483 last = NEXT_INSN (last))
9484 if (INSN_P (last))
9486 group_barrier_needed (last);
9487 if (recog_memoized (last) >= 0
9488 && important_for_bundling_p (last))
9489 seen_good_insn = 1;
9492 else
9494 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9495 insn);
9496 init_insn_group_barriers ();
9497 seen_good_insn = 0;
9499 group_barrier_needed (insn);
9500 if (recog_memoized (insn) >= 0
9501 && important_for_bundling_p (insn))
9502 seen_good_insn = 1;
9504 else if (recog_memoized (insn) >= 0
9505 && important_for_bundling_p (insn))
9506 seen_good_insn = 1;
9507 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9514 /* If the following function returns TRUE, we will use the DFA
9515 insn scheduler. */
9517 static int
9518 ia64_first_cycle_multipass_dfa_lookahead (void)
9520 return (reload_completed ? 6 : 4);
9523 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9525 static void
9526 ia64_init_dfa_pre_cycle_insn (void)
9528 if (temp_dfa_state == NULL)
9530 dfa_state_size = state_size ();
9531 temp_dfa_state = xmalloc (dfa_state_size);
9532 prev_cycle_state = xmalloc (dfa_state_size);
9534 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9535 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9536 recog_memoized (dfa_pre_cycle_insn);
9537 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9538 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9539 recog_memoized (dfa_stop_insn);
9542 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9543 used by the DFA insn scheduler. */
9545 static rtx
9546 ia64_dfa_pre_cycle_insn (void)
9548 return dfa_pre_cycle_insn;
9551 /* The following function returns TRUE if PRODUCER (of type ilog or
9552 ld) produces address for CONSUMER (of type st or stf). */
9555 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9557 rtx dest, reg, mem;
9559 gcc_assert (producer && consumer);
9560 dest = ia64_single_set (producer);
9561 gcc_assert (dest);
9562 reg = SET_DEST (dest);
9563 gcc_assert (reg);
9564 if (GET_CODE (reg) == SUBREG)
9565 reg = SUBREG_REG (reg);
9566 gcc_assert (GET_CODE (reg) == REG);
9568 dest = ia64_single_set (consumer);
9569 gcc_assert (dest);
9570 mem = SET_DEST (dest);
9571 gcc_assert (mem && GET_CODE (mem) == MEM);
9572 return reg_mentioned_p (reg, mem);
9575 /* The following function returns TRUE if PRODUCER (of type ilog or
9576 ld) produces address for CONSUMER (of type ld or fld). */
9579 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9581 rtx dest, src, reg, mem;
9583 gcc_assert (producer && consumer);
9584 dest = ia64_single_set (producer);
9585 gcc_assert (dest);
9586 reg = SET_DEST (dest);
9587 gcc_assert (reg);
9588 if (GET_CODE (reg) == SUBREG)
9589 reg = SUBREG_REG (reg);
9590 gcc_assert (GET_CODE (reg) == REG);
9592 src = ia64_single_set (consumer);
9593 gcc_assert (src);
9594 mem = SET_SRC (src);
9595 gcc_assert (mem);
9597 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9598 mem = XVECEXP (mem, 0, 0);
9599 else if (GET_CODE (mem) == IF_THEN_ELSE)
9600 /* ??? Is this bypass necessary for ld.c? */
9602 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9603 mem = XEXP (mem, 1);
9606 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9607 mem = XEXP (mem, 0);
9609 if (GET_CODE (mem) == UNSPEC)
9611 int c = XINT (mem, 1);
9613 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9614 || c == UNSPEC_LDSA);
9615 mem = XVECEXP (mem, 0, 0);
9618 /* Note that LO_SUM is used for GOT loads. */
9619 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9621 return reg_mentioned_p (reg, mem);
9624 /* The following function returns TRUE if INSN produces address for a
9625 load/store insn. We will place such insns into M slot because it
9626 decreases its latency time. */
9629 ia64_produce_address_p (rtx insn)
9631 return insn->call;
9635 /* Emit pseudo-ops for the assembler to describe predicate relations.
9636 At present this assumes that we only consider predicate pairs to
9637 be mutex, and that the assembler can deduce proper values from
9638 straight-line code. */
9640 static void
9641 emit_predicate_relation_info (void)
9643 basic_block bb;
9645 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9647 int r;
9648 rtx_insn *head = BB_HEAD (bb);
9650 /* We only need such notes at code labels. */
9651 if (! LABEL_P (head))
9652 continue;
9653 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9654 head = NEXT_INSN (head);
9656 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9657 grabbing the entire block of predicate registers. */
9658 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9659 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9661 rtx p = gen_rtx_REG (BImode, r);
9662 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9663 if (head == BB_END (bb))
9664 BB_END (bb) = n;
9665 head = n;
9669 /* Look for conditional calls that do not return, and protect predicate
9670 relations around them. Otherwise the assembler will assume the call
9671 returns, and complain about uses of call-clobbered predicates after
9672 the call. */
9673 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9675 rtx_insn *insn = BB_HEAD (bb);
9677 while (1)
9679 if (CALL_P (insn)
9680 && GET_CODE (PATTERN (insn)) == COND_EXEC
9681 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9683 rtx_insn *b =
9684 emit_insn_before (gen_safe_across_calls_all (), insn);
9685 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9686 if (BB_HEAD (bb) == insn)
9687 BB_HEAD (bb) = b;
9688 if (BB_END (bb) == insn)
9689 BB_END (bb) = a;
9692 if (insn == BB_END (bb))
9693 break;
9694 insn = NEXT_INSN (insn);
9699 /* Perform machine dependent operations on the rtl chain INSNS. */
9701 static void
9702 ia64_reorg (void)
9704 /* We are freeing block_for_insn in the toplev to keep compatibility
9705 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9706 compute_bb_for_insn ();
9708 /* If optimizing, we'll have split before scheduling. */
9709 if (optimize == 0)
9710 split_all_insns ();
9712 if (optimize && flag_schedule_insns_after_reload
9713 && dbg_cnt (ia64_sched2))
9715 basic_block bb;
9716 timevar_push (TV_SCHED2);
9717 ia64_final_schedule = 1;
9719 /* We can't let modulo-sched prevent us from scheduling any bbs,
9720 since we need the final schedule to produce bundle information. */
9721 FOR_EACH_BB_FN (bb, cfun)
9722 bb->flags &= ~BB_DISABLE_SCHEDULE;
9724 initiate_bundle_states ();
9725 ia64_nop = make_insn_raw (gen_nop ());
9726 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9727 recog_memoized (ia64_nop);
9728 clocks_length = get_max_uid () + 1;
9729 stops_p = XCNEWVEC (char, clocks_length);
9731 if (ia64_tune == PROCESSOR_ITANIUM2)
9733 pos_1 = get_cpu_unit_code ("2_1");
9734 pos_2 = get_cpu_unit_code ("2_2");
9735 pos_3 = get_cpu_unit_code ("2_3");
9736 pos_4 = get_cpu_unit_code ("2_4");
9737 pos_5 = get_cpu_unit_code ("2_5");
9738 pos_6 = get_cpu_unit_code ("2_6");
9739 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9740 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9741 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9742 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9743 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9744 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9745 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9746 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9747 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9748 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9749 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9750 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9751 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9752 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9753 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9754 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9755 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9756 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9757 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9758 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9760 else
9762 pos_1 = get_cpu_unit_code ("1_1");
9763 pos_2 = get_cpu_unit_code ("1_2");
9764 pos_3 = get_cpu_unit_code ("1_3");
9765 pos_4 = get_cpu_unit_code ("1_4");
9766 pos_5 = get_cpu_unit_code ("1_5");
9767 pos_6 = get_cpu_unit_code ("1_6");
9768 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9769 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9770 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9771 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9772 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9773 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9774 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9775 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9776 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9777 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9778 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9779 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9780 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9781 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9782 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9783 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9784 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9785 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9786 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9787 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9790 if (flag_selective_scheduling2
9791 && !maybe_skip_selective_scheduling ())
9792 run_selective_scheduling ();
9793 else
9794 schedule_ebbs ();
9796 /* Redo alignment computation, as it might gone wrong. */
9797 compute_alignments ();
9799 /* We cannot reuse this one because it has been corrupted by the
9800 evil glat. */
9801 finish_bundle_states ();
9802 free (stops_p);
9803 stops_p = NULL;
9804 emit_insn_group_barriers (dump_file);
9806 ia64_final_schedule = 0;
9807 timevar_pop (TV_SCHED2);
9809 else
9810 emit_all_insn_group_barriers (dump_file);
9812 df_analyze ();
9814 /* A call must not be the last instruction in a function, so that the
9815 return address is still within the function, so that unwinding works
9816 properly. Note that IA-64 differs from dwarf2 on this point. */
9817 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9819 rtx_insn *insn;
9820 int saw_stop = 0;
9822 insn = get_last_insn ();
9823 if (! INSN_P (insn))
9824 insn = prev_active_insn (insn);
9825 if (insn)
9827 /* Skip over insns that expand to nothing. */
9828 while (NONJUMP_INSN_P (insn)
9829 && get_attr_empty (insn) == EMPTY_YES)
9831 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9832 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9833 saw_stop = 1;
9834 insn = prev_active_insn (insn);
9836 if (CALL_P (insn))
9838 if (! saw_stop)
9839 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9840 emit_insn (gen_break_f ());
9841 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9846 emit_predicate_relation_info ();
9848 if (flag_var_tracking)
9850 timevar_push (TV_VAR_TRACKING);
9851 variable_tracking_main ();
9852 timevar_pop (TV_VAR_TRACKING);
9854 df_finish_pass (false);
9857 /* Return true if REGNO is used by the epilogue. */
9860 ia64_epilogue_uses (int regno)
9862 switch (regno)
9864 case R_GR (1):
9865 /* With a call to a function in another module, we will write a new
9866 value to "gp". After returning from such a call, we need to make
9867 sure the function restores the original gp-value, even if the
9868 function itself does not use the gp anymore. */
9869 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9871 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9872 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9873 /* For functions defined with the syscall_linkage attribute, all
9874 input registers are marked as live at all function exits. This
9875 prevents the register allocator from using the input registers,
9876 which in turn makes it possible to restart a system call after
9877 an interrupt without having to save/restore the input registers.
9878 This also prevents kernel data from leaking to application code. */
9879 return lookup_attribute ("syscall_linkage",
9880 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9882 case R_BR (0):
9883 /* Conditional return patterns can't represent the use of `b0' as
9884 the return address, so we force the value live this way. */
9885 return 1;
9887 case AR_PFS_REGNUM:
9888 /* Likewise for ar.pfs, which is used by br.ret. */
9889 return 1;
9891 default:
9892 return 0;
9896 /* Return true if REGNO is used by the frame unwinder. */
9899 ia64_eh_uses (int regno)
9901 unsigned int r;
9903 if (! reload_completed)
9904 return 0;
9906 if (regno == 0)
9907 return 0;
9909 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9910 if (regno == current_frame_info.r[r]
9911 || regno == emitted_frame_related_regs[r])
9912 return 1;
9914 return 0;
9917 /* Return true if this goes in small data/bss. */
9919 /* ??? We could also support own long data here. Generating movl/add/ld8
9920 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9921 code faster because there is one less load. This also includes incomplete
9922 types which can't go in sdata/sbss. */
9924 static bool
9925 ia64_in_small_data_p (const_tree exp)
9927 if (TARGET_NO_SDATA)
9928 return false;
9930 /* We want to merge strings, so we never consider them small data. */
9931 if (TREE_CODE (exp) == STRING_CST)
9932 return false;
9934 /* Functions are never small data. */
9935 if (TREE_CODE (exp) == FUNCTION_DECL)
9936 return false;
9938 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9940 const char *section = DECL_SECTION_NAME (exp);
9942 if (strcmp (section, ".sdata") == 0
9943 || strncmp (section, ".sdata.", 7) == 0
9944 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9945 || strcmp (section, ".sbss") == 0
9946 || strncmp (section, ".sbss.", 6) == 0
9947 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9948 return true;
9950 else
9952 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9954 /* If this is an incomplete type with size 0, then we can't put it
9955 in sdata because it might be too big when completed. */
9956 if (size > 0 && size <= ia64_section_threshold)
9957 return true;
9960 return false;
9963 /* Output assembly directives for prologue regions. */
9965 /* The current basic block number. */
9967 static bool last_block;
9969 /* True if we need a copy_state command at the start of the next block. */
9971 static bool need_copy_state;
9973 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9974 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9975 #endif
9977 /* The function emits unwind directives for the start of an epilogue. */
9979 static void
9980 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9981 bool unwind, bool frame ATTRIBUTE_UNUSED)
9983 /* If this isn't the last block of the function, then we need to label the
9984 current state, and copy it back in at the start of the next block. */
9986 if (!last_block)
9988 if (unwind)
9989 fprintf (asm_out_file, "\t.label_state %d\n",
9990 ++cfun->machine->state_num);
9991 need_copy_state = true;
9994 if (unwind)
9995 fprintf (asm_out_file, "\t.restore sp\n");
9998 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10000 static void
10001 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10002 bool unwind, bool frame)
10004 rtx dest = SET_DEST (pat);
10005 rtx src = SET_SRC (pat);
10007 if (dest == stack_pointer_rtx)
10009 if (GET_CODE (src) == PLUS)
10011 rtx op0 = XEXP (src, 0);
10012 rtx op1 = XEXP (src, 1);
10014 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10016 if (INTVAL (op1) < 0)
10018 gcc_assert (!frame_pointer_needed);
10019 if (unwind)
10020 fprintf (asm_out_file,
10021 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
10022 -INTVAL (op1));
10024 else
10025 process_epilogue (asm_out_file, insn, unwind, frame);
10027 else
10029 gcc_assert (src == hard_frame_pointer_rtx);
10030 process_epilogue (asm_out_file, insn, unwind, frame);
10033 else if (dest == hard_frame_pointer_rtx)
10035 gcc_assert (src == stack_pointer_rtx);
10036 gcc_assert (frame_pointer_needed);
10038 if (unwind)
10039 fprintf (asm_out_file, "\t.vframe r%d\n",
10040 ia64_dbx_register_number (REGNO (dest)));
10042 else
10043 gcc_unreachable ();
10046 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10048 static void
10049 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10051 rtx dest = SET_DEST (pat);
10052 rtx src = SET_SRC (pat);
10053 int dest_regno = REGNO (dest);
10054 int src_regno;
10056 if (src == pc_rtx)
10058 /* Saving return address pointer. */
10059 if (unwind)
10060 fprintf (asm_out_file, "\t.save rp, r%d\n",
10061 ia64_dbx_register_number (dest_regno));
10062 return;
10065 src_regno = REGNO (src);
10067 switch (src_regno)
10069 case PR_REG (0):
10070 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10071 if (unwind)
10072 fprintf (asm_out_file, "\t.save pr, r%d\n",
10073 ia64_dbx_register_number (dest_regno));
10074 break;
10076 case AR_UNAT_REGNUM:
10077 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10078 if (unwind)
10079 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10080 ia64_dbx_register_number (dest_regno));
10081 break;
10083 case AR_LC_REGNUM:
10084 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10085 if (unwind)
10086 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10087 ia64_dbx_register_number (dest_regno));
10088 break;
10090 default:
10091 /* Everything else should indicate being stored to memory. */
10092 gcc_unreachable ();
10096 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10098 static void
10099 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10101 rtx dest = SET_DEST (pat);
10102 rtx src = SET_SRC (pat);
10103 int src_regno = REGNO (src);
10104 const char *saveop;
10105 HOST_WIDE_INT off;
10106 rtx base;
10108 gcc_assert (MEM_P (dest));
10109 if (GET_CODE (XEXP (dest, 0)) == REG)
10111 base = XEXP (dest, 0);
10112 off = 0;
10114 else
10116 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10117 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10118 base = XEXP (XEXP (dest, 0), 0);
10119 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10122 if (base == hard_frame_pointer_rtx)
10124 saveop = ".savepsp";
10125 off = - off;
10127 else
10129 gcc_assert (base == stack_pointer_rtx);
10130 saveop = ".savesp";
10133 src_regno = REGNO (src);
10134 switch (src_regno)
10136 case BR_REG (0):
10137 gcc_assert (!current_frame_info.r[reg_save_b0]);
10138 if (unwind)
10139 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10140 saveop, off);
10141 break;
10143 case PR_REG (0):
10144 gcc_assert (!current_frame_info.r[reg_save_pr]);
10145 if (unwind)
10146 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10147 saveop, off);
10148 break;
10150 case AR_LC_REGNUM:
10151 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10152 if (unwind)
10153 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10154 saveop, off);
10155 break;
10157 case AR_PFS_REGNUM:
10158 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10159 if (unwind)
10160 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10161 saveop, off);
10162 break;
10164 case AR_UNAT_REGNUM:
10165 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10166 if (unwind)
10167 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10168 saveop, off);
10169 break;
10171 case GR_REG (4):
10172 case GR_REG (5):
10173 case GR_REG (6):
10174 case GR_REG (7):
10175 if (unwind)
10176 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10177 1 << (src_regno - GR_REG (4)));
10178 break;
10180 case BR_REG (1):
10181 case BR_REG (2):
10182 case BR_REG (3):
10183 case BR_REG (4):
10184 case BR_REG (5):
10185 if (unwind)
10186 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10187 1 << (src_regno - BR_REG (1)));
10188 break;
10190 case FR_REG (2):
10191 case FR_REG (3):
10192 case FR_REG (4):
10193 case FR_REG (5):
10194 if (unwind)
10195 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10196 1 << (src_regno - FR_REG (2)));
10197 break;
10199 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10200 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10201 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10202 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10203 if (unwind)
10204 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10205 1 << (src_regno - FR_REG (12)));
10206 break;
10208 default:
10209 /* ??? For some reason we mark other general registers, even those
10210 we can't represent in the unwind info. Ignore them. */
10211 break;
10215 /* This function looks at a single insn and emits any directives
10216 required to unwind this insn. */
10218 static void
10219 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10221 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10222 bool frame = dwarf2out_do_frame ();
10223 rtx note, pat;
10224 bool handled_one;
10226 if (!unwind && !frame)
10227 return;
10229 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10231 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10232 == EXIT_BLOCK_PTR_FOR_FN (cfun);
10234 /* Restore unwind state from immediately before the epilogue. */
10235 if (need_copy_state)
10237 if (unwind)
10239 fprintf (asm_out_file, "\t.body\n");
10240 fprintf (asm_out_file, "\t.copy_state %d\n",
10241 cfun->machine->state_num);
10243 need_copy_state = false;
10247 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10248 return;
10250 /* Look for the ALLOC insn. */
10251 if (INSN_CODE (insn) == CODE_FOR_alloc)
10253 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10254 int dest_regno = REGNO (dest);
10256 /* If this is the final destination for ar.pfs, then this must
10257 be the alloc in the prologue. */
10258 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10260 if (unwind)
10261 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10262 ia64_dbx_register_number (dest_regno));
10264 else
10266 /* This must be an alloc before a sibcall. We must drop the
10267 old frame info. The easiest way to drop the old frame
10268 info is to ensure we had a ".restore sp" directive
10269 followed by a new prologue. If the procedure doesn't
10270 have a memory-stack frame, we'll issue a dummy ".restore
10271 sp" now. */
10272 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10273 /* if haven't done process_epilogue() yet, do it now */
10274 process_epilogue (asm_out_file, insn, unwind, frame);
10275 if (unwind)
10276 fprintf (asm_out_file, "\t.prologue\n");
10278 return;
10281 handled_one = false;
10282 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10283 switch (REG_NOTE_KIND (note))
10285 case REG_CFA_ADJUST_CFA:
10286 pat = XEXP (note, 0);
10287 if (pat == NULL)
10288 pat = PATTERN (insn);
10289 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10290 handled_one = true;
10291 break;
10293 case REG_CFA_OFFSET:
10294 pat = XEXP (note, 0);
10295 if (pat == NULL)
10296 pat = PATTERN (insn);
10297 process_cfa_offset (asm_out_file, pat, unwind);
10298 handled_one = true;
10299 break;
10301 case REG_CFA_REGISTER:
10302 pat = XEXP (note, 0);
10303 if (pat == NULL)
10304 pat = PATTERN (insn);
10305 process_cfa_register (asm_out_file, pat, unwind);
10306 handled_one = true;
10307 break;
10309 case REG_FRAME_RELATED_EXPR:
10310 case REG_CFA_DEF_CFA:
10311 case REG_CFA_EXPRESSION:
10312 case REG_CFA_RESTORE:
10313 case REG_CFA_SET_VDRAP:
10314 /* Not used in the ia64 port. */
10315 gcc_unreachable ();
10317 default:
10318 /* Not a frame-related note. */
10319 break;
10322 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10323 explicit action to take. No guessing required. */
10324 gcc_assert (handled_one);
10327 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10329 static void
10330 ia64_asm_emit_except_personality (rtx personality)
10332 fputs ("\t.personality\t", asm_out_file);
10333 output_addr_const (asm_out_file, personality);
10334 fputc ('\n', asm_out_file);
10337 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10339 static void
10340 ia64_asm_init_sections (void)
10342 exception_section = get_unnamed_section (0, output_section_asm_op,
10343 "\t.handlerdata");
10346 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10348 static enum unwind_info_type
10349 ia64_debug_unwind_info (void)
10351 return UI_TARGET;
10354 enum ia64_builtins
10356 IA64_BUILTIN_BSP,
10357 IA64_BUILTIN_COPYSIGNQ,
10358 IA64_BUILTIN_FABSQ,
10359 IA64_BUILTIN_FLUSHRS,
10360 IA64_BUILTIN_INFQ,
10361 IA64_BUILTIN_HUGE_VALQ,
10362 IA64_BUILTIN_max
10365 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10367 void
10368 ia64_init_builtins (void)
10370 tree fpreg_type;
10371 tree float80_type;
10372 tree decl;
10374 /* The __fpreg type. */
10375 fpreg_type = make_node (REAL_TYPE);
10376 TYPE_PRECISION (fpreg_type) = 82;
10377 layout_type (fpreg_type);
10378 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10380 /* The __float80 type. */
10381 float80_type = make_node (REAL_TYPE);
10382 TYPE_PRECISION (float80_type) = 80;
10383 layout_type (float80_type);
10384 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10386 /* The __float128 type. */
10387 if (!TARGET_HPUX)
10389 tree ftype;
10390 tree float128_type = make_node (REAL_TYPE);
10392 TYPE_PRECISION (float128_type) = 128;
10393 layout_type (float128_type);
10394 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10396 /* TFmode support builtins. */
10397 ftype = build_function_type_list (float128_type, NULL_TREE);
10398 decl = add_builtin_function ("__builtin_infq", ftype,
10399 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10400 NULL, NULL_TREE);
10401 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10403 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10404 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10405 NULL, NULL_TREE);
10406 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10408 ftype = build_function_type_list (float128_type,
10409 float128_type,
10410 NULL_TREE);
10411 decl = add_builtin_function ("__builtin_fabsq", ftype,
10412 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10413 "__fabstf2", NULL_TREE);
10414 TREE_READONLY (decl) = 1;
10415 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10417 ftype = build_function_type_list (float128_type,
10418 float128_type,
10419 float128_type,
10420 NULL_TREE);
10421 decl = add_builtin_function ("__builtin_copysignq", ftype,
10422 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10423 "__copysigntf3", NULL_TREE);
10424 TREE_READONLY (decl) = 1;
10425 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10427 else
10428 /* Under HPUX, this is a synonym for "long double". */
10429 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10430 "__float128");
10432 /* Fwrite on VMS is non-standard. */
10433 #if TARGET_ABI_OPEN_VMS
10434 vms_patch_builtins ();
10435 #endif
10437 #define def_builtin(name, type, code) \
10438 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10439 NULL, NULL_TREE)
10441 decl = def_builtin ("__builtin_ia64_bsp",
10442 build_function_type_list (ptr_type_node, NULL_TREE),
10443 IA64_BUILTIN_BSP);
10444 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10446 decl = def_builtin ("__builtin_ia64_flushrs",
10447 build_function_type_list (void_type_node, NULL_TREE),
10448 IA64_BUILTIN_FLUSHRS);
10449 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10451 #undef def_builtin
10453 if (TARGET_HPUX)
10455 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10456 set_user_assembler_name (decl, "_Isfinite");
10457 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10458 set_user_assembler_name (decl, "_Isfinitef");
10459 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10460 set_user_assembler_name (decl, "_Isfinitef128");
10465 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10466 machine_mode mode ATTRIBUTE_UNUSED,
10467 int ignore ATTRIBUTE_UNUSED)
10469 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10470 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10472 switch (fcode)
10474 case IA64_BUILTIN_BSP:
10475 if (! target || ! register_operand (target, DImode))
10476 target = gen_reg_rtx (DImode);
10477 emit_insn (gen_bsp_value (target));
10478 #ifdef POINTERS_EXTEND_UNSIGNED
10479 target = convert_memory_address (ptr_mode, target);
10480 #endif
10481 return target;
10483 case IA64_BUILTIN_FLUSHRS:
10484 emit_insn (gen_flushrs ());
10485 return const0_rtx;
10487 case IA64_BUILTIN_INFQ:
10488 case IA64_BUILTIN_HUGE_VALQ:
10490 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10491 REAL_VALUE_TYPE inf;
10492 rtx tmp;
10494 real_inf (&inf);
10495 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10497 tmp = validize_mem (force_const_mem (target_mode, tmp));
10499 if (target == 0)
10500 target = gen_reg_rtx (target_mode);
10502 emit_move_insn (target, tmp);
10503 return target;
10506 case IA64_BUILTIN_FABSQ:
10507 case IA64_BUILTIN_COPYSIGNQ:
10508 return expand_call (exp, target, ignore);
10510 default:
10511 gcc_unreachable ();
10514 return NULL_RTX;
10517 /* Return the ia64 builtin for CODE. */
10519 static tree
10520 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10522 if (code >= IA64_BUILTIN_max)
10523 return error_mark_node;
10525 return ia64_builtins[code];
10528 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10529 most significant bits of the stack slot. */
10531 enum direction
10532 ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
10534 /* Exception to normal case for structures/unions/etc. */
10536 if (type && AGGREGATE_TYPE_P (type)
10537 && int_size_in_bytes (type) < UNITS_PER_WORD)
10538 return upward;
10540 /* Fall back to the default. */
10541 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10544 /* Emit text to declare externally defined variables and functions, because
10545 the Intel assembler does not support undefined externals. */
10547 void
10548 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10550 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10551 set in order to avoid putting out names that are never really
10552 used. */
10553 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10555 /* maybe_assemble_visibility will return 1 if the assembler
10556 visibility directive is output. */
10557 int need_visibility = ((*targetm.binds_local_p) (decl)
10558 && maybe_assemble_visibility (decl));
10560 /* GNU as does not need anything here, but the HP linker does
10561 need something for external functions. */
10562 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10563 && TREE_CODE (decl) == FUNCTION_DECL)
10564 (*targetm.asm_out.globalize_decl_name) (file, decl);
10565 else if (need_visibility && !TARGET_GNU_AS)
10566 (*targetm.asm_out.globalize_label) (file, name);
10570 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10571 modes of word_mode and larger. Rename the TFmode libfuncs using the
10572 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10573 backward compatibility. */
10575 static void
10576 ia64_init_libfuncs (void)
10578 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10579 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10580 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10581 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10583 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10584 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10585 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10586 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10587 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10589 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10590 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10591 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10592 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10593 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10594 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10596 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10597 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10598 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10599 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10600 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10602 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10603 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10604 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10605 /* HP-UX 11.23 libc does not have a function for unsigned
10606 SImode-to-TFmode conversion. */
10607 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10610 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10612 static void
10613 ia64_hpux_init_libfuncs (void)
10615 ia64_init_libfuncs ();
10617 /* The HP SI millicode division and mod functions expect DI arguments.
10618 By turning them off completely we avoid using both libgcc and the
10619 non-standard millicode routines and use the HP DI millicode routines
10620 instead. */
10622 set_optab_libfunc (sdiv_optab, SImode, 0);
10623 set_optab_libfunc (udiv_optab, SImode, 0);
10624 set_optab_libfunc (smod_optab, SImode, 0);
10625 set_optab_libfunc (umod_optab, SImode, 0);
10627 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10628 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10629 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10630 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10632 /* HP-UX libc has TF min/max/abs routines in it. */
10633 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10634 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10635 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10637 /* ia64_expand_compare uses this. */
10638 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10640 /* These should never be used. */
10641 set_optab_libfunc (eq_optab, TFmode, 0);
10642 set_optab_libfunc (ne_optab, TFmode, 0);
10643 set_optab_libfunc (gt_optab, TFmode, 0);
10644 set_optab_libfunc (ge_optab, TFmode, 0);
10645 set_optab_libfunc (lt_optab, TFmode, 0);
10646 set_optab_libfunc (le_optab, TFmode, 0);
10649 /* Rename the division and modulus functions in VMS. */
10651 static void
10652 ia64_vms_init_libfuncs (void)
10654 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10655 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10656 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10657 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10658 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10659 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10660 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10661 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10662 abort_libfunc = init_one_libfunc ("decc$abort");
10663 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10664 #ifdef MEM_LIBFUNCS_INIT
10665 MEM_LIBFUNCS_INIT;
10666 #endif
10669 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10670 the HPUX conventions. */
10672 static void
10673 ia64_sysv4_init_libfuncs (void)
10675 ia64_init_libfuncs ();
10677 /* These functions are not part of the HPUX TFmode interface. We
10678 use them instead of _U_Qfcmp, which doesn't work the way we
10679 expect. */
10680 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10681 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10682 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10683 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10684 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10685 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10687 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10688 glibc doesn't have them. */
10691 /* Use soft-fp. */
10693 static void
10694 ia64_soft_fp_init_libfuncs (void)
10698 static bool
10699 ia64_vms_valid_pointer_mode (machine_mode mode)
10701 return (mode == SImode || mode == DImode);
10704 /* For HPUX, it is illegal to have relocations in shared segments. */
10706 static int
10707 ia64_hpux_reloc_rw_mask (void)
10709 return 3;
10712 /* For others, relax this so that relocations to local data goes in
10713 read-only segments, but we still cannot allow global relocations
10714 in read-only segments. */
10716 static int
10717 ia64_reloc_rw_mask (void)
10719 return flag_pic ? 3 : 2;
10722 /* Return the section to use for X. The only special thing we do here
10723 is to honor small data. */
10725 static section *
10726 ia64_select_rtx_section (machine_mode mode, rtx x,
10727 unsigned HOST_WIDE_INT align)
10729 if (GET_MODE_SIZE (mode) > 0
10730 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10731 && !TARGET_NO_SDATA)
10732 return sdata_section;
10733 else
10734 return default_elf_select_rtx_section (mode, x, align);
10737 static unsigned int
10738 ia64_section_type_flags (tree decl, const char *name, int reloc)
10740 unsigned int flags = 0;
10742 if (strcmp (name, ".sdata") == 0
10743 || strncmp (name, ".sdata.", 7) == 0
10744 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10745 || strncmp (name, ".sdata2.", 8) == 0
10746 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10747 || strcmp (name, ".sbss") == 0
10748 || strncmp (name, ".sbss.", 6) == 0
10749 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10750 flags = SECTION_SMALL;
10752 flags |= default_section_type_flags (decl, name, reloc);
10753 return flags;
10756 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10757 structure type and that the address of that type should be passed
10758 in out0, rather than in r8. */
10760 static bool
10761 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10763 tree ret_type = TREE_TYPE (fntype);
10765 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10766 as the structure return address parameter, if the return value
10767 type has a non-trivial copy constructor or destructor. It is not
10768 clear if this same convention should be used for other
10769 programming languages. Until G++ 3.4, we incorrectly used r8 for
10770 these return values. */
10771 return (abi_version_at_least (2)
10772 && ret_type
10773 && TYPE_MODE (ret_type) == BLKmode
10774 && TREE_ADDRESSABLE (ret_type)
10775 && lang_GNU_CXX ());
10778 /* Output the assembler code for a thunk function. THUNK_DECL is the
10779 declaration for the thunk function itself, FUNCTION is the decl for
10780 the target function. DELTA is an immediate constant offset to be
10781 added to THIS. If VCALL_OFFSET is nonzero, the word at
10782 *(*this + vcall_offset) should be added to THIS. */
10784 static void
10785 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10786 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10787 tree function)
10789 rtx this_rtx, funexp;
10790 rtx_insn *insn;
10791 unsigned int this_parmno;
10792 unsigned int this_regno;
10793 rtx delta_rtx;
10795 reload_completed = 1;
10796 epilogue_completed = 1;
10798 /* Set things up as ia64_expand_prologue might. */
10799 last_scratch_gr_reg = 15;
10801 memset (&current_frame_info, 0, sizeof (current_frame_info));
10802 current_frame_info.spill_cfa_off = -16;
10803 current_frame_info.n_input_regs = 1;
10804 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10806 /* Mark the end of the (empty) prologue. */
10807 emit_note (NOTE_INSN_PROLOGUE_END);
10809 /* Figure out whether "this" will be the first parameter (the
10810 typical case) or the second parameter (as happens when the
10811 virtual function returns certain class objects). */
10812 this_parmno
10813 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10814 ? 1 : 0);
10815 this_regno = IN_REG (this_parmno);
10816 if (!TARGET_REG_NAMES)
10817 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10819 this_rtx = gen_rtx_REG (Pmode, this_regno);
10821 /* Apply the constant offset, if required. */
10822 delta_rtx = GEN_INT (delta);
10823 if (TARGET_ILP32)
10825 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10826 REG_POINTER (tmp) = 1;
10827 if (delta && satisfies_constraint_I (delta_rtx))
10829 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10830 delta = 0;
10832 else
10833 emit_insn (gen_ptr_extend (this_rtx, tmp));
10835 if (delta)
10837 if (!satisfies_constraint_I (delta_rtx))
10839 rtx tmp = gen_rtx_REG (Pmode, 2);
10840 emit_move_insn (tmp, delta_rtx);
10841 delta_rtx = tmp;
10843 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10846 /* Apply the offset from the vtable, if required. */
10847 if (vcall_offset)
10849 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10850 rtx tmp = gen_rtx_REG (Pmode, 2);
10852 if (TARGET_ILP32)
10854 rtx t = gen_rtx_REG (ptr_mode, 2);
10855 REG_POINTER (t) = 1;
10856 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10857 if (satisfies_constraint_I (vcall_offset_rtx))
10859 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10860 vcall_offset = 0;
10862 else
10863 emit_insn (gen_ptr_extend (tmp, t));
10865 else
10866 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10868 if (vcall_offset)
10870 if (!satisfies_constraint_J (vcall_offset_rtx))
10872 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10873 emit_move_insn (tmp2, vcall_offset_rtx);
10874 vcall_offset_rtx = tmp2;
10876 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10879 if (TARGET_ILP32)
10880 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10881 else
10882 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10884 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10887 /* Generate a tail call to the target function. */
10888 if (! TREE_USED (function))
10890 assemble_external (function);
10891 TREE_USED (function) = 1;
10893 funexp = XEXP (DECL_RTL (function), 0);
10894 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10895 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10896 insn = get_last_insn ();
10897 SIBLING_CALL_P (insn) = 1;
10899 /* Code generation for calls relies on splitting. */
10900 reload_completed = 1;
10901 epilogue_completed = 1;
10902 try_split (PATTERN (insn), insn, 0);
10904 emit_barrier ();
10906 /* Run just enough of rest_of_compilation to get the insns emitted.
10907 There's not really enough bulk here to make other passes such as
10908 instruction scheduling worth while. Note that use_thunk calls
10909 assemble_start_function and assemble_end_function. */
10911 emit_all_insn_group_barriers (NULL);
10912 insn = get_insns ();
10913 shorten_branches (insn);
10914 final_start_function (insn, file, 1);
10915 final (insn, file, 1);
10916 final_end_function ();
10918 reload_completed = 0;
10919 epilogue_completed = 0;
10922 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10924 static rtx
10925 ia64_struct_value_rtx (tree fntype,
10926 int incoming ATTRIBUTE_UNUSED)
10928 if (TARGET_ABI_OPEN_VMS ||
10929 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10930 return NULL_RTX;
10931 return gen_rtx_REG (Pmode, GR_REG (8));
10934 static bool
10935 ia64_scalar_mode_supported_p (machine_mode mode)
10937 switch (mode)
10939 case QImode:
10940 case HImode:
10941 case SImode:
10942 case DImode:
10943 case TImode:
10944 return true;
10946 case SFmode:
10947 case DFmode:
10948 case XFmode:
10949 case RFmode:
10950 return true;
10952 case TFmode:
10953 return true;
10955 default:
10956 return false;
10960 static bool
10961 ia64_vector_mode_supported_p (machine_mode mode)
10963 switch (mode)
10965 case V8QImode:
10966 case V4HImode:
10967 case V2SImode:
10968 return true;
10970 case V2SFmode:
10971 return true;
10973 default:
10974 return false;
10978 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P. */
10980 static bool
10981 ia64_libgcc_floating_mode_supported_p (machine_mode mode)
10983 switch (mode)
10985 case SFmode:
10986 case DFmode:
10987 return true;
10989 case XFmode:
10990 #ifdef IA64_NO_LIBGCC_XFMODE
10991 return false;
10992 #else
10993 return true;
10994 #endif
10996 case TFmode:
10997 #ifdef IA64_NO_LIBGCC_TFMODE
10998 return false;
10999 #else
11000 return true;
11001 #endif
11003 default:
11004 return false;
11008 /* Implement the FUNCTION_PROFILER macro. */
11010 void
11011 ia64_output_function_profiler (FILE *file, int labelno)
11013 bool indirect_call;
11015 /* If the function needs a static chain and the static chain
11016 register is r15, we use an indirect call so as to bypass
11017 the PLT stub in case the executable is dynamically linked,
11018 because the stub clobbers r15 as per 5.3.6 of the psABI.
11019 We don't need to do that in non canonical PIC mode. */
11021 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11023 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11024 indirect_call = true;
11026 else
11027 indirect_call = false;
11029 if (TARGET_GNU_AS)
11030 fputs ("\t.prologue 4, r40\n", file);
11031 else
11032 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11033 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11035 if (NO_PROFILE_COUNTERS)
11036 fputs ("\tmov out3 = r0\n", file);
11037 else
11039 char buf[20];
11040 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11042 if (TARGET_AUTO_PIC)
11043 fputs ("\tmovl out3 = @gprel(", file);
11044 else
11045 fputs ("\taddl out3 = @ltoff(", file);
11046 assemble_name (file, buf);
11047 if (TARGET_AUTO_PIC)
11048 fputs (")\n", file);
11049 else
11050 fputs ("), r1\n", file);
11053 if (indirect_call)
11054 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11055 fputs ("\t;;\n", file);
11057 fputs ("\t.save rp, r42\n", file);
11058 fputs ("\tmov out2 = b0\n", file);
11059 if (indirect_call)
11060 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11061 fputs ("\t.body\n", file);
11062 fputs ("\tmov out1 = r1\n", file);
11063 if (indirect_call)
11065 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11066 fputs ("\tmov b6 = r16\n", file);
11067 fputs ("\tld8 r1 = [r14]\n", file);
11068 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11070 else
11071 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11074 static GTY(()) rtx mcount_func_rtx;
11075 static rtx
11076 gen_mcount_func_rtx (void)
11078 if (!mcount_func_rtx)
11079 mcount_func_rtx = init_one_libfunc ("_mcount");
11080 return mcount_func_rtx;
11083 void
11084 ia64_profile_hook (int labelno)
11086 rtx label, ip;
11088 if (NO_PROFILE_COUNTERS)
11089 label = const0_rtx;
11090 else
11092 char buf[30];
11093 const char *label_name;
11094 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11095 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11096 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11097 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11099 ip = gen_reg_rtx (Pmode);
11100 emit_insn (gen_ip_value (ip));
11101 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11102 VOIDmode, 3,
11103 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11104 ip, Pmode,
11105 label, Pmode);
11108 /* Return the mangling of TYPE if it is an extended fundamental type. */
11110 static const char *
11111 ia64_mangle_type (const_tree type)
11113 type = TYPE_MAIN_VARIANT (type);
11115 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11116 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11117 return NULL;
11119 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11120 mangled as "e". */
11121 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11122 return "g";
11123 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11124 an extended mangling. Elsewhere, "e" is available since long
11125 double is 80 bits. */
11126 if (TYPE_MODE (type) == XFmode)
11127 return TARGET_HPUX ? "u9__float80" : "e";
11128 if (TYPE_MODE (type) == RFmode)
11129 return "u7__fpreg";
11130 return NULL;
11133 /* Return the diagnostic message string if conversion from FROMTYPE to
11134 TOTYPE is not allowed, NULL otherwise. */
11135 static const char *
11136 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11138 /* Reject nontrivial conversion to or from __fpreg. */
11139 if (TYPE_MODE (fromtype) == RFmode
11140 && TYPE_MODE (totype) != RFmode
11141 && TYPE_MODE (totype) != VOIDmode)
11142 return N_("invalid conversion from %<__fpreg%>");
11143 if (TYPE_MODE (totype) == RFmode
11144 && TYPE_MODE (fromtype) != RFmode)
11145 return N_("invalid conversion to %<__fpreg%>");
11146 return NULL;
11149 /* Return the diagnostic message string if the unary operation OP is
11150 not permitted on TYPE, NULL otherwise. */
11151 static const char *
11152 ia64_invalid_unary_op (int op, const_tree type)
11154 /* Reject operations on __fpreg other than unary + or &. */
11155 if (TYPE_MODE (type) == RFmode
11156 && op != CONVERT_EXPR
11157 && op != ADDR_EXPR)
11158 return N_("invalid operation on %<__fpreg%>");
11159 return NULL;
11162 /* Return the diagnostic message string if the binary operation OP is
11163 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11164 static const char *
11165 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11167 /* Reject operations on __fpreg. */
11168 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11169 return N_("invalid operation on %<__fpreg%>");
11170 return NULL;
11173 /* HP-UX version_id attribute.
11174 For object foo, if the version_id is set to 1234 put out an alias
11175 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11176 other than an alias statement because it is an illegal symbol name. */
11178 static tree
11179 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11180 tree name ATTRIBUTE_UNUSED,
11181 tree args,
11182 int flags ATTRIBUTE_UNUSED,
11183 bool *no_add_attrs)
11185 tree arg = TREE_VALUE (args);
11187 if (TREE_CODE (arg) != STRING_CST)
11189 error("version attribute is not a string");
11190 *no_add_attrs = true;
11191 return NULL_TREE;
11193 return NULL_TREE;
11196 /* Target hook for c_mode_for_suffix. */
11198 static machine_mode
11199 ia64_c_mode_for_suffix (char suffix)
11201 if (suffix == 'q')
11202 return TFmode;
11203 if (suffix == 'w')
11204 return XFmode;
11206 return VOIDmode;
11209 static GTY(()) rtx ia64_dconst_0_5_rtx;
11212 ia64_dconst_0_5 (void)
11214 if (! ia64_dconst_0_5_rtx)
11216 REAL_VALUE_TYPE rv;
11217 real_from_string (&rv, "0.5");
11218 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11220 return ia64_dconst_0_5_rtx;
11223 static GTY(()) rtx ia64_dconst_0_375_rtx;
11226 ia64_dconst_0_375 (void)
11228 if (! ia64_dconst_0_375_rtx)
11230 REAL_VALUE_TYPE rv;
11231 real_from_string (&rv, "0.375");
11232 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11234 return ia64_dconst_0_375_rtx;
11237 static machine_mode
11238 ia64_get_reg_raw_mode (int regno)
11240 if (FR_REGNO_P (regno))
11241 return XFmode;
11242 return default_get_reg_raw_mode(regno);
11245 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11246 anymore. */
11248 bool
11249 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11251 return TARGET_HPUX && mode == TFmode;
11254 /* Always default to .text section until HP-UX linker is fixed. */
11256 ATTRIBUTE_UNUSED static section *
11257 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11258 enum node_frequency freq ATTRIBUTE_UNUSED,
11259 bool startup ATTRIBUTE_UNUSED,
11260 bool exit ATTRIBUTE_UNUSED)
11262 return NULL;
11265 /* Construct (set target (vec_select op0 (parallel perm))) and
11266 return true if that's a valid instruction in the active ISA. */
11268 static bool
11269 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11271 rtx rperm[MAX_VECT_LEN], x;
11272 unsigned i;
11274 for (i = 0; i < nelt; ++i)
11275 rperm[i] = GEN_INT (perm[i]);
11277 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11278 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11279 x = gen_rtx_SET (VOIDmode, target, x);
11281 rtx_insn *insn = emit_insn (x);
11282 if (recog_memoized (insn) < 0)
11284 remove_insn (insn);
11285 return false;
11287 return true;
11290 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11292 static bool
11293 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11294 const unsigned char *perm, unsigned nelt)
11296 machine_mode v2mode;
11297 rtx x;
11299 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11300 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11301 return expand_vselect (target, x, perm, nelt);
11304 /* Try to expand a no-op permutation. */
11306 static bool
11307 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11309 unsigned i, nelt = d->nelt;
11311 for (i = 0; i < nelt; ++i)
11312 if (d->perm[i] != i)
11313 return false;
11315 if (!d->testing_p)
11316 emit_move_insn (d->target, d->op0);
11318 return true;
11321 /* Try to expand D via a shrp instruction. */
11323 static bool
11324 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11326 unsigned i, nelt = d->nelt, shift, mask;
11327 rtx tmp, hi, lo;
11329 /* ??? Don't force V2SFmode into the integer registers. */
11330 if (d->vmode == V2SFmode)
11331 return false;
11333 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11335 shift = d->perm[0];
11336 if (BYTES_BIG_ENDIAN && shift > nelt)
11337 return false;
11339 for (i = 1; i < nelt; ++i)
11340 if (d->perm[i] != ((shift + i) & mask))
11341 return false;
11343 if (d->testing_p)
11344 return true;
11346 hi = shift < nelt ? d->op1 : d->op0;
11347 lo = shift < nelt ? d->op0 : d->op1;
11349 shift %= nelt;
11351 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11353 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11354 gcc_assert (IN_RANGE (shift, 1, 63));
11356 /* Recall that big-endian elements are numbered starting at the top of
11357 the register. Ideally we'd have a shift-left-pair. But since we
11358 don't, convert to a shift the other direction. */
11359 if (BYTES_BIG_ENDIAN)
11360 shift = 64 - shift;
11362 tmp = gen_reg_rtx (DImode);
11363 hi = gen_lowpart (DImode, hi);
11364 lo = gen_lowpart (DImode, lo);
11365 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11367 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11368 return true;
11371 /* Try to instantiate D in a single instruction. */
11373 static bool
11374 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11376 unsigned i, nelt = d->nelt;
11377 unsigned char perm2[MAX_VECT_LEN];
11379 /* Try single-operand selections. */
11380 if (d->one_operand_p)
11382 if (expand_vec_perm_identity (d))
11383 return true;
11384 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11385 return true;
11388 /* Try two operand selections. */
11389 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11390 return true;
11392 /* Recognize interleave style patterns with reversed operands. */
11393 if (!d->one_operand_p)
11395 for (i = 0; i < nelt; ++i)
11397 unsigned e = d->perm[i];
11398 if (e >= nelt)
11399 e -= nelt;
11400 else
11401 e += nelt;
11402 perm2[i] = e;
11405 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11406 return true;
11409 if (expand_vec_perm_shrp (d))
11410 return true;
11412 /* ??? Look for deposit-like permutations where most of the result
11413 comes from one vector unchanged and the rest comes from a
11414 sequential hunk of the other vector. */
11416 return false;
11419 /* Pattern match broadcast permutations. */
11421 static bool
11422 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11424 unsigned i, elt, nelt = d->nelt;
11425 unsigned char perm2[2];
11426 rtx temp;
11427 bool ok;
11429 if (!d->one_operand_p)
11430 return false;
11432 elt = d->perm[0];
11433 for (i = 1; i < nelt; ++i)
11434 if (d->perm[i] != elt)
11435 return false;
11437 switch (d->vmode)
11439 case V2SImode:
11440 case V2SFmode:
11441 /* Implementable by interleave. */
11442 perm2[0] = elt;
11443 perm2[1] = elt + 2;
11444 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11445 gcc_assert (ok);
11446 break;
11448 case V8QImode:
11449 /* Implementable by extract + broadcast. */
11450 if (BYTES_BIG_ENDIAN)
11451 elt = 7 - elt;
11452 elt *= BITS_PER_UNIT;
11453 temp = gen_reg_rtx (DImode);
11454 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11455 GEN_INT (8), GEN_INT (elt)));
11456 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11457 break;
11459 case V4HImode:
11460 /* Should have been matched directly by vec_select. */
11461 default:
11462 gcc_unreachable ();
11465 return true;
11468 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11469 two vector permutation into a single vector permutation by using
11470 an interleave operation to merge the vectors. */
11472 static bool
11473 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11475 struct expand_vec_perm_d dremap, dfinal;
11476 unsigned char remap[2 * MAX_VECT_LEN];
11477 unsigned contents, i, nelt, nelt2;
11478 unsigned h0, h1, h2, h3;
11479 rtx_insn *seq;
11480 bool ok;
11482 if (d->one_operand_p)
11483 return false;
11485 nelt = d->nelt;
11486 nelt2 = nelt / 2;
11488 /* Examine from whence the elements come. */
11489 contents = 0;
11490 for (i = 0; i < nelt; ++i)
11491 contents |= 1u << d->perm[i];
11493 memset (remap, 0xff, sizeof (remap));
11494 dremap = *d;
11496 h0 = (1u << nelt2) - 1;
11497 h1 = h0 << nelt2;
11498 h2 = h0 << nelt;
11499 h3 = h0 << (nelt + nelt2);
11501 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11503 for (i = 0; i < nelt; ++i)
11505 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11506 remap[which] = i;
11507 dremap.perm[i] = which;
11510 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11512 for (i = 0; i < nelt; ++i)
11514 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11515 remap[which] = i;
11516 dremap.perm[i] = which;
11519 else if ((contents & 0x5555) == contents) /* mix even elements */
11521 for (i = 0; i < nelt; ++i)
11523 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11524 remap[which] = i;
11525 dremap.perm[i] = which;
11528 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11530 for (i = 0; i < nelt; ++i)
11532 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11533 remap[which] = i;
11534 dremap.perm[i] = which;
11537 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11539 unsigned shift = ctz_hwi (contents);
11540 for (i = 0; i < nelt; ++i)
11542 unsigned which = (i + shift) & (2 * nelt - 1);
11543 remap[which] = i;
11544 dremap.perm[i] = which;
11547 else
11548 return false;
11550 /* Use the remapping array set up above to move the elements from their
11551 swizzled locations into their final destinations. */
11552 dfinal = *d;
11553 for (i = 0; i < nelt; ++i)
11555 unsigned e = remap[d->perm[i]];
11556 gcc_assert (e < nelt);
11557 dfinal.perm[i] = e;
11559 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11560 dfinal.op1 = dfinal.op0;
11561 dfinal.one_operand_p = true;
11562 dremap.target = dfinal.op0;
11564 /* Test if the final remap can be done with a single insn. For V4HImode
11565 this *will* succeed. For V8QImode or V2SImode it may not. */
11566 start_sequence ();
11567 ok = expand_vec_perm_1 (&dfinal);
11568 seq = get_insns ();
11569 end_sequence ();
11570 if (!ok)
11571 return false;
11572 if (d->testing_p)
11573 return true;
11575 ok = expand_vec_perm_1 (&dremap);
11576 gcc_assert (ok);
11578 emit_insn (seq);
11579 return true;
11582 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11583 constant permutation via two mux2 and a merge. */
11585 static bool
11586 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11588 unsigned char perm2[4];
11589 rtx rmask[4];
11590 unsigned i;
11591 rtx t0, t1, mask, x;
11592 bool ok;
11594 if (d->vmode != V4HImode || d->one_operand_p)
11595 return false;
11596 if (d->testing_p)
11597 return true;
11599 for (i = 0; i < 4; ++i)
11601 perm2[i] = d->perm[i] & 3;
11602 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11604 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11605 mask = force_reg (V4HImode, mask);
11607 t0 = gen_reg_rtx (V4HImode);
11608 t1 = gen_reg_rtx (V4HImode);
11610 ok = expand_vselect (t0, d->op0, perm2, 4);
11611 gcc_assert (ok);
11612 ok = expand_vselect (t1, d->op1, perm2, 4);
11613 gcc_assert (ok);
11615 x = gen_rtx_AND (V4HImode, mask, t0);
11616 emit_insn (gen_rtx_SET (VOIDmode, t0, x));
11618 x = gen_rtx_NOT (V4HImode, mask);
11619 x = gen_rtx_AND (V4HImode, x, t1);
11620 emit_insn (gen_rtx_SET (VOIDmode, t1, x));
11622 x = gen_rtx_IOR (V4HImode, t0, t1);
11623 emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
11625 return true;
11628 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11629 With all of the interface bits taken care of, perform the expansion
11630 in D and return true on success. */
11632 static bool
11633 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11635 if (expand_vec_perm_1 (d))
11636 return true;
11637 if (expand_vec_perm_broadcast (d))
11638 return true;
11639 if (expand_vec_perm_interleave_2 (d))
11640 return true;
11641 if (expand_vec_perm_v4hi_5 (d))
11642 return true;
11643 return false;
11646 bool
11647 ia64_expand_vec_perm_const (rtx operands[4])
11649 struct expand_vec_perm_d d;
11650 unsigned char perm[MAX_VECT_LEN];
11651 int i, nelt, which;
11652 rtx sel;
11654 d.target = operands[0];
11655 d.op0 = operands[1];
11656 d.op1 = operands[2];
11657 sel = operands[3];
11659 d.vmode = GET_MODE (d.target);
11660 gcc_assert (VECTOR_MODE_P (d.vmode));
11661 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11662 d.testing_p = false;
11664 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11665 gcc_assert (XVECLEN (sel, 0) == nelt);
11666 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11668 for (i = which = 0; i < nelt; ++i)
11670 rtx e = XVECEXP (sel, 0, i);
11671 int ei = INTVAL (e) & (2 * nelt - 1);
11673 which |= (ei < nelt ? 1 : 2);
11674 d.perm[i] = ei;
11675 perm[i] = ei;
11678 switch (which)
11680 default:
11681 gcc_unreachable();
11683 case 3:
11684 if (!rtx_equal_p (d.op0, d.op1))
11686 d.one_operand_p = false;
11687 break;
11690 /* The elements of PERM do not suggest that only the first operand
11691 is used, but both operands are identical. Allow easier matching
11692 of the permutation by folding the permutation into the single
11693 input vector. */
11694 for (i = 0; i < nelt; ++i)
11695 if (d.perm[i] >= nelt)
11696 d.perm[i] -= nelt;
11697 /* FALLTHRU */
11699 case 1:
11700 d.op1 = d.op0;
11701 d.one_operand_p = true;
11702 break;
11704 case 2:
11705 for (i = 0; i < nelt; ++i)
11706 d.perm[i] -= nelt;
11707 d.op0 = d.op1;
11708 d.one_operand_p = true;
11709 break;
11712 if (ia64_expand_vec_perm_const_1 (&d))
11713 return true;
11715 /* If the mask says both arguments are needed, but they are the same,
11716 the above tried to expand with one_operand_p true. If that didn't
11717 work, retry with one_operand_p false, as that's what we used in _ok. */
11718 if (which == 3 && d.one_operand_p)
11720 memcpy (d.perm, perm, sizeof (perm));
11721 d.one_operand_p = false;
11722 return ia64_expand_vec_perm_const_1 (&d);
11725 return false;
11728 /* Implement targetm.vectorize.vec_perm_const_ok. */
11730 static bool
11731 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11732 const unsigned char *sel)
11734 struct expand_vec_perm_d d;
11735 unsigned int i, nelt, which;
11736 bool ret;
11738 d.vmode = vmode;
11739 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11740 d.testing_p = true;
11742 /* Extract the values from the vector CST into the permutation
11743 array in D. */
11744 memcpy (d.perm, sel, nelt);
11745 for (i = which = 0; i < nelt; ++i)
11747 unsigned char e = d.perm[i];
11748 gcc_assert (e < 2 * nelt);
11749 which |= (e < nelt ? 1 : 2);
11752 /* For all elements from second vector, fold the elements to first. */
11753 if (which == 2)
11754 for (i = 0; i < nelt; ++i)
11755 d.perm[i] -= nelt;
11757 /* Check whether the mask can be applied to the vector type. */
11758 d.one_operand_p = (which != 3);
11760 /* Otherwise we have to go through the motions and see if we can
11761 figure out how to generate the requested permutation. */
11762 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11763 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11764 if (!d.one_operand_p)
11765 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11767 start_sequence ();
11768 ret = ia64_expand_vec_perm_const_1 (&d);
11769 end_sequence ();
11771 return ret;
11774 void
11775 ia64_expand_vec_setv2sf (rtx operands[3])
11777 struct expand_vec_perm_d d;
11778 unsigned int which;
11779 bool ok;
11781 d.target = operands[0];
11782 d.op0 = operands[0];
11783 d.op1 = gen_reg_rtx (V2SFmode);
11784 d.vmode = V2SFmode;
11785 d.nelt = 2;
11786 d.one_operand_p = false;
11787 d.testing_p = false;
11789 which = INTVAL (operands[2]);
11790 gcc_assert (which <= 1);
11791 d.perm[0] = 1 - which;
11792 d.perm[1] = which + 2;
11794 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11796 ok = ia64_expand_vec_perm_const_1 (&d);
11797 gcc_assert (ok);
11800 void
11801 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11803 struct expand_vec_perm_d d;
11804 machine_mode vmode = GET_MODE (target);
11805 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11806 bool ok;
11808 d.target = target;
11809 d.op0 = op0;
11810 d.op1 = op1;
11811 d.vmode = vmode;
11812 d.nelt = nelt;
11813 d.one_operand_p = false;
11814 d.testing_p = false;
11816 for (i = 0; i < nelt; ++i)
11817 d.perm[i] = i * 2 + odd;
11819 ok = ia64_expand_vec_perm_const_1 (&d);
11820 gcc_assert (ok);
11823 #include "gt-ia64.h"