2016-11-10 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / config / ia64 / ia64.c
blob855715715995caa48763cd14a5c8f137d512956f
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2016 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "diagnostic-core.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "flags.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "cfgrtl.h"
50 #include "libfuncs.h"
51 #include "sched-int.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "gimplify.h"
55 #include "intl.h"
56 #include "debug.h"
57 #include "params.h"
58 #include "dbgcnt.h"
59 #include "tm-constrs.h"
60 #include "sel-sched.h"
61 #include "reload.h"
62 #include "opts.h"
63 #include "dumpfile.h"
64 #include "builtins.h"
66 /* This file should be included last. */
67 #include "target-def.h"
69 /* This is used for communication between ASM_OUTPUT_LABEL and
70 ASM_OUTPUT_LABELREF. */
71 int ia64_asm_output_label = 0;
73 /* Register names for ia64_expand_prologue. */
74 static const char * const ia64_reg_numbers[96] =
75 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
76 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
77 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
78 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
79 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
80 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
81 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
82 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
83 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
84 "r104","r105","r106","r107","r108","r109","r110","r111",
85 "r112","r113","r114","r115","r116","r117","r118","r119",
86 "r120","r121","r122","r123","r124","r125","r126","r127"};
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_input_reg_names[8] =
90 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
92 /* ??? These strings could be shared with REGISTER_NAMES. */
93 static const char * const ia64_local_reg_names[80] =
94 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
95 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
96 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
97 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
98 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
99 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
100 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
101 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
102 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
103 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
105 /* ??? These strings could be shared with REGISTER_NAMES. */
106 static const char * const ia64_output_reg_names[8] =
107 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
109 /* Variables which are this size or smaller are put in the sdata/sbss
110 sections. */
112 unsigned int ia64_section_threshold;
114 /* The following variable is used by the DFA insn scheduler. The value is
115 TRUE if we do insn bundling instead of insn scheduling. */
116 int bundling_p = 0;
118 enum ia64_frame_regs
120 reg_fp,
121 reg_save_b0,
122 reg_save_pr,
123 reg_save_ar_pfs,
124 reg_save_ar_unat,
125 reg_save_ar_lc,
126 reg_save_gp,
127 number_of_ia64_frame_regs
130 /* Structure to be filled in by ia64_compute_frame_size with register
131 save masks and offsets for the current function. */
133 struct ia64_frame_info
135 HOST_WIDE_INT total_size; /* size of the stack frame, not including
136 the caller's scratch area. */
137 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
138 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
139 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
140 HARD_REG_SET mask; /* mask of saved registers. */
141 unsigned int gr_used_mask; /* mask of registers in use as gr spill
142 registers or long-term scratches. */
143 int n_spilled; /* number of spilled registers. */
144 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
145 int n_input_regs; /* number of input registers used. */
146 int n_local_regs; /* number of local registers used. */
147 int n_output_regs; /* number of output registers used. */
148 int n_rotate_regs; /* number of rotating registers used. */
150 char need_regstk; /* true if a .regstk directive needed. */
151 char initialized; /* true if the data is finalized. */
154 /* Current frame information calculated by ia64_compute_frame_size. */
155 static struct ia64_frame_info current_frame_info;
156 /* The actual registers that are emitted. */
157 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
159 static int ia64_first_cycle_multipass_dfa_lookahead (void);
160 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
161 static void ia64_init_dfa_pre_cycle_insn (void);
162 static rtx ia64_dfa_pre_cycle_insn (void);
163 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
164 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
165 static void ia64_h_i_d_extended (void);
166 static void * ia64_alloc_sched_context (void);
167 static void ia64_init_sched_context (void *, bool);
168 static void ia64_set_sched_context (void *);
169 static void ia64_clear_sched_context (void *);
170 static void ia64_free_sched_context (void *);
171 static int ia64_mode_to_int (machine_mode);
172 static void ia64_set_sched_flags (spec_info_t);
173 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
174 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
175 static bool ia64_skip_rtx_p (const_rtx);
176 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
177 static bool ia64_needs_block_p (ds_t);
178 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
179 static int ia64_spec_check_p (rtx);
180 static int ia64_spec_check_src_p (rtx);
181 static rtx gen_tls_get_addr (void);
182 static rtx gen_thread_pointer (void);
183 static int find_gr_spill (enum ia64_frame_regs, int);
184 static int next_scratch_gr_reg (void);
185 static void mark_reg_gr_used_mask (rtx, void *);
186 static void ia64_compute_frame_size (HOST_WIDE_INT);
187 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
188 static void finish_spill_pointers (void);
189 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
190 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
191 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
192 static rtx gen_movdi_x (rtx, rtx, rtx);
193 static rtx gen_fr_spill_x (rtx, rtx, rtx);
194 static rtx gen_fr_restore_x (rtx, rtx, rtx);
196 static void ia64_option_override (void);
197 static bool ia64_can_eliminate (const int, const int);
198 static machine_mode hfa_element_mode (const_tree, bool);
199 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
200 tree, int *, int);
201 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
202 tree, bool);
203 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
204 const_tree, bool, bool);
205 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
206 const_tree, bool);
207 static rtx ia64_function_incoming_arg (cumulative_args_t,
208 machine_mode, const_tree, bool);
209 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
210 const_tree, bool);
211 static unsigned int ia64_function_arg_boundary (machine_mode,
212 const_tree);
213 static bool ia64_function_ok_for_sibcall (tree, tree);
214 static bool ia64_return_in_memory (const_tree, const_tree);
215 static rtx ia64_function_value (const_tree, const_tree, bool);
216 static rtx ia64_libcall_value (machine_mode, const_rtx);
217 static bool ia64_function_value_regno_p (const unsigned int);
218 static int ia64_register_move_cost (machine_mode, reg_class_t,
219 reg_class_t);
220 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
221 bool);
222 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
223 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
224 static void fix_range (const char *);
225 static struct machine_function * ia64_init_machine_status (void);
226 static void emit_insn_group_barriers (FILE *);
227 static void emit_all_insn_group_barriers (FILE *);
228 static void final_emit_insn_group_barriers (FILE *);
229 static void emit_predicate_relation_info (void);
230 static void ia64_reorg (void);
231 static bool ia64_in_small_data_p (const_tree);
232 static void process_epilogue (FILE *, rtx, bool, bool);
234 static bool ia64_assemble_integer (rtx, unsigned int, int);
235 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
236 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
237 static void ia64_output_function_end_prologue (FILE *);
239 static void ia64_print_operand (FILE *, rtx, int);
240 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
241 static bool ia64_print_operand_punct_valid_p (unsigned char code);
243 static int ia64_issue_rate (void);
244 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
245 static void ia64_sched_init (FILE *, int, int);
246 static void ia64_sched_init_global (FILE *, int, int);
247 static void ia64_sched_finish_global (FILE *, int);
248 static void ia64_sched_finish (FILE *, int);
249 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
250 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
251 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
252 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
254 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
255 static void ia64_asm_emit_except_personality (rtx);
256 static void ia64_asm_init_sections (void);
258 static enum unwind_info_type ia64_debug_unwind_info (void);
260 static struct bundle_state *get_free_bundle_state (void);
261 static void free_bundle_state (struct bundle_state *);
262 static void initiate_bundle_states (void);
263 static void finish_bundle_states (void);
264 static int insert_bundle_state (struct bundle_state *);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state *, int);
268 static int try_issue_insn (struct bundle_state *, rtx);
269 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
270 int, int);
271 static int get_max_pos (state_t);
272 static int get_template (state_t, int);
274 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
275 static bool important_for_bundling_p (rtx_insn *);
276 static bool unknown_for_bundling_p (rtx_insn *);
277 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
279 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
280 HOST_WIDE_INT, tree);
281 static void ia64_file_start (void);
282 static void ia64_globalize_decl_name (FILE *, tree);
284 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
285 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
286 static section *ia64_select_rtx_section (machine_mode, rtx,
287 unsigned HOST_WIDE_INT);
288 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
289 ATTRIBUTE_UNUSED;
290 static unsigned int ia64_section_type_flags (tree, const char *, int);
291 static void ia64_init_libfuncs (void)
292 ATTRIBUTE_UNUSED;
293 static void ia64_hpux_init_libfuncs (void)
294 ATTRIBUTE_UNUSED;
295 static void ia64_sysv4_init_libfuncs (void)
296 ATTRIBUTE_UNUSED;
297 static void ia64_vms_init_libfuncs (void)
298 ATTRIBUTE_UNUSED;
299 static void ia64_soft_fp_init_libfuncs (void)
300 ATTRIBUTE_UNUSED;
301 static bool ia64_vms_valid_pointer_mode (machine_mode mode)
302 ATTRIBUTE_UNUSED;
303 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
304 ATTRIBUTE_UNUSED;
306 static bool ia64_attribute_takes_identifier_p (const_tree);
307 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
308 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
309 static void ia64_encode_section_info (tree, rtx, int);
310 static rtx ia64_struct_value_rtx (tree, int);
311 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
312 static bool ia64_scalar_mode_supported_p (machine_mode mode);
313 static bool ia64_vector_mode_supported_p (machine_mode mode);
314 static bool ia64_legitimate_constant_p (machine_mode, rtx);
315 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
316 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
317 static const char *ia64_mangle_type (const_tree);
318 static const char *ia64_invalid_conversion (const_tree, const_tree);
319 static const char *ia64_invalid_unary_op (int, const_tree);
320 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
321 static machine_mode ia64_c_mode_for_suffix (char);
322 static void ia64_trampoline_init (rtx, tree, rtx);
323 static void ia64_override_options_after_change (void);
324 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
326 static tree ia64_fold_builtin (tree, int, tree *, bool);
327 static tree ia64_builtin_decl (unsigned, bool);
329 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
330 static machine_mode ia64_get_reg_raw_mode (int regno);
331 static section * ia64_hpux_function_section (tree, enum node_frequency,
332 bool, bool);
334 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
335 const unsigned char *sel);
337 #define MAX_VECT_LEN 8
339 struct expand_vec_perm_d
341 rtx target, op0, op1;
342 unsigned char perm[MAX_VECT_LEN];
343 machine_mode vmode;
344 unsigned char nelt;
345 bool one_operand_p;
346 bool testing_p;
349 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
352 /* Table of valid machine attributes. */
353 static const struct attribute_spec ia64_attribute_table[] =
355 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
356 affects_type_identity } */
357 { "syscall_linkage", 0, 0, false, true, true, NULL, false },
358 { "model", 1, 1, true, false, false, ia64_handle_model_attribute,
359 false },
360 #if TARGET_ABI_OPEN_VMS
361 { "common_object", 1, 1, true, false, false,
362 ia64_vms_common_object_attribute, false },
363 #endif
364 { "version_id", 1, 1, true, false, false,
365 ia64_handle_version_id_attribute, false },
366 { NULL, 0, 0, false, false, false, NULL, false }
369 /* Initialize the GCC target structure. */
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS ia64_init_builtins
376 #undef TARGET_FOLD_BUILTIN
377 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
379 #undef TARGET_EXPAND_BUILTIN
380 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
382 #undef TARGET_BUILTIN_DECL
383 #define TARGET_BUILTIN_DECL ia64_builtin_decl
385 #undef TARGET_ASM_BYTE_OP
386 #define TARGET_ASM_BYTE_OP "\tdata1\t"
387 #undef TARGET_ASM_ALIGNED_HI_OP
388 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
389 #undef TARGET_ASM_ALIGNED_SI_OP
390 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
391 #undef TARGET_ASM_ALIGNED_DI_OP
392 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
393 #undef TARGET_ASM_UNALIGNED_HI_OP
394 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
395 #undef TARGET_ASM_UNALIGNED_SI_OP
396 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
397 #undef TARGET_ASM_UNALIGNED_DI_OP
398 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
399 #undef TARGET_ASM_INTEGER
400 #define TARGET_ASM_INTEGER ia64_assemble_integer
402 #undef TARGET_OPTION_OVERRIDE
403 #define TARGET_OPTION_OVERRIDE ia64_option_override
405 #undef TARGET_ASM_FUNCTION_PROLOGUE
406 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
407 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
408 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND ia64_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
419 #undef TARGET_IN_SMALL_DATA_P
420 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
422 #undef TARGET_SCHED_ADJUST_COST
423 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
424 #undef TARGET_SCHED_ISSUE_RATE
425 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
426 #undef TARGET_SCHED_VARIABLE_ISSUE
427 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
428 #undef TARGET_SCHED_INIT
429 #define TARGET_SCHED_INIT ia64_sched_init
430 #undef TARGET_SCHED_FINISH
431 #define TARGET_SCHED_FINISH ia64_sched_finish
432 #undef TARGET_SCHED_INIT_GLOBAL
433 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
434 #undef TARGET_SCHED_FINISH_GLOBAL
435 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
436 #undef TARGET_SCHED_REORDER
437 #define TARGET_SCHED_REORDER ia64_sched_reorder
438 #undef TARGET_SCHED_REORDER2
439 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
441 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
442 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
444 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
445 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
447 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
448 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
449 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
450 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
452 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
453 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
454 ia64_first_cycle_multipass_dfa_lookahead_guard
456 #undef TARGET_SCHED_DFA_NEW_CYCLE
457 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
459 #undef TARGET_SCHED_H_I_D_EXTENDED
460 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
462 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
463 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
465 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
466 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
468 #undef TARGET_SCHED_SET_SCHED_CONTEXT
469 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
471 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
472 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
474 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
475 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
477 #undef TARGET_SCHED_SET_SCHED_FLAGS
478 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
480 #undef TARGET_SCHED_GET_INSN_SPEC_DS
481 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
483 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
484 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
486 #undef TARGET_SCHED_SPECULATE_INSN
487 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
489 #undef TARGET_SCHED_NEEDS_BLOCK_P
490 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
492 #undef TARGET_SCHED_GEN_SPEC_CHECK
493 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
495 #undef TARGET_SCHED_SKIP_RTX_P
496 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
498 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
499 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
500 #undef TARGET_ARG_PARTIAL_BYTES
501 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
502 #undef TARGET_FUNCTION_ARG
503 #define TARGET_FUNCTION_ARG ia64_function_arg
504 #undef TARGET_FUNCTION_INCOMING_ARG
505 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
506 #undef TARGET_FUNCTION_ARG_ADVANCE
507 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
508 #undef TARGET_FUNCTION_ARG_BOUNDARY
509 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
511 #undef TARGET_ASM_OUTPUT_MI_THUNK
512 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
513 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
514 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
516 #undef TARGET_ASM_FILE_START
517 #define TARGET_ASM_FILE_START ia64_file_start
519 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
520 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
522 #undef TARGET_REGISTER_MOVE_COST
523 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
524 #undef TARGET_MEMORY_MOVE_COST
525 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
526 #undef TARGET_RTX_COSTS
527 #define TARGET_RTX_COSTS ia64_rtx_costs
528 #undef TARGET_ADDRESS_COST
529 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
531 #undef TARGET_UNSPEC_MAY_TRAP_P
532 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
534 #undef TARGET_MACHINE_DEPENDENT_REORG
535 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
537 #undef TARGET_ENCODE_SECTION_INFO
538 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
540 #undef TARGET_SECTION_TYPE_FLAGS
541 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
543 #ifdef HAVE_AS_TLS
544 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
545 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
546 #endif
548 /* ??? Investigate. */
549 #if 0
550 #undef TARGET_PROMOTE_PROTOTYPES
551 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
552 #endif
554 #undef TARGET_FUNCTION_VALUE
555 #define TARGET_FUNCTION_VALUE ia64_function_value
556 #undef TARGET_LIBCALL_VALUE
557 #define TARGET_LIBCALL_VALUE ia64_libcall_value
558 #undef TARGET_FUNCTION_VALUE_REGNO_P
559 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
561 #undef TARGET_STRUCT_VALUE_RTX
562 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
563 #undef TARGET_RETURN_IN_MEMORY
564 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
565 #undef TARGET_SETUP_INCOMING_VARARGS
566 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
567 #undef TARGET_STRICT_ARGUMENT_NAMING
568 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
569 #undef TARGET_MUST_PASS_IN_STACK
570 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
571 #undef TARGET_GET_RAW_RESULT_MODE
572 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
573 #undef TARGET_GET_RAW_ARG_MODE
574 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
576 #undef TARGET_MEMBER_TYPE_FORCES_BLK
577 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
579 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
580 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
582 #undef TARGET_ASM_UNWIND_EMIT
583 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
584 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
585 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
589 #undef TARGET_DEBUG_UNWIND_INFO
590 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
592 #undef TARGET_SCALAR_MODE_SUPPORTED_P
593 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
594 #undef TARGET_VECTOR_MODE_SUPPORTED_P
595 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
597 #undef TARGET_LEGITIMATE_CONSTANT_P
598 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
602 #undef TARGET_LRA_P
603 #define TARGET_LRA_P hook_bool_void_false
605 #undef TARGET_CANNOT_FORCE_CONST_MEM
606 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
608 #undef TARGET_MANGLE_TYPE
609 #define TARGET_MANGLE_TYPE ia64_mangle_type
611 #undef TARGET_INVALID_CONVERSION
612 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
613 #undef TARGET_INVALID_UNARY_OP
614 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
615 #undef TARGET_INVALID_BINARY_OP
616 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
618 #undef TARGET_C_MODE_FOR_SUFFIX
619 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
621 #undef TARGET_CAN_ELIMINATE
622 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
624 #undef TARGET_TRAMPOLINE_INIT
625 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
627 #undef TARGET_CAN_USE_DOLOOP_P
628 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
629 #undef TARGET_INVALID_WITHIN_DOLOOP
630 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
632 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
633 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
635 #undef TARGET_PREFERRED_RELOAD_CLASS
636 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
638 #undef TARGET_DELAY_SCHED2
639 #define TARGET_DELAY_SCHED2 true
641 /* Variable tracking should be run after all optimizations which
642 change order of insns. It also needs a valid CFG. */
643 #undef TARGET_DELAY_VARTRACK
644 #define TARGET_DELAY_VARTRACK true
646 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
647 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
649 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
650 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
652 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
653 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
655 struct gcc_target targetm = TARGET_INITIALIZER;
657 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
658 identifier as an argument, so the front end shouldn't look it up. */
660 static bool
661 ia64_attribute_takes_identifier_p (const_tree attr_id)
663 if (is_attribute_p ("model", attr_id))
664 return true;
665 #if TARGET_ABI_OPEN_VMS
666 if (is_attribute_p ("common_object", attr_id))
667 return true;
668 #endif
669 return false;
672 typedef enum
674 ADDR_AREA_NORMAL, /* normal address area */
675 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
677 ia64_addr_area;
679 static GTY(()) tree small_ident1;
680 static GTY(()) tree small_ident2;
682 static void
683 init_idents (void)
685 if (small_ident1 == 0)
687 small_ident1 = get_identifier ("small");
688 small_ident2 = get_identifier ("__small__");
692 /* Retrieve the address area that has been chosen for the given decl. */
694 static ia64_addr_area
695 ia64_get_addr_area (tree decl)
697 tree model_attr;
699 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
700 if (model_attr)
702 tree id;
704 init_idents ();
705 id = TREE_VALUE (TREE_VALUE (model_attr));
706 if (id == small_ident1 || id == small_ident2)
707 return ADDR_AREA_SMALL;
709 return ADDR_AREA_NORMAL;
712 static tree
713 ia64_handle_model_attribute (tree *node, tree name, tree args,
714 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
716 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
717 ia64_addr_area area;
718 tree arg, decl = *node;
720 init_idents ();
721 arg = TREE_VALUE (args);
722 if (arg == small_ident1 || arg == small_ident2)
724 addr_area = ADDR_AREA_SMALL;
726 else
728 warning (OPT_Wattributes, "invalid argument of %qE attribute",
729 name);
730 *no_add_attrs = true;
733 switch (TREE_CODE (decl))
735 case VAR_DECL:
736 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
737 == FUNCTION_DECL)
738 && !TREE_STATIC (decl))
740 error_at (DECL_SOURCE_LOCATION (decl),
741 "an address area attribute cannot be specified for "
742 "local variables");
743 *no_add_attrs = true;
745 area = ia64_get_addr_area (decl);
746 if (area != ADDR_AREA_NORMAL && addr_area != area)
748 error ("address area of %q+D conflicts with previous "
749 "declaration", decl);
750 *no_add_attrs = true;
752 break;
754 case FUNCTION_DECL:
755 error_at (DECL_SOURCE_LOCATION (decl),
756 "address area attribute cannot be specified for "
757 "functions");
758 *no_add_attrs = true;
759 break;
761 default:
762 warning (OPT_Wattributes, "%qE attribute ignored",
763 name);
764 *no_add_attrs = true;
765 break;
768 return NULL_TREE;
771 /* Part of the low level implementation of DEC Ada pragma Common_Object which
772 enables the shared use of variables stored in overlaid linker areas
773 corresponding to the use of Fortran COMMON. */
775 static tree
776 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
777 int flags ATTRIBUTE_UNUSED,
778 bool *no_add_attrs)
780 tree decl = *node;
781 tree id;
783 gcc_assert (DECL_P (decl));
785 DECL_COMMON (decl) = 1;
786 id = TREE_VALUE (args);
787 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
789 error ("%qE attribute requires a string constant argument", name);
790 *no_add_attrs = true;
791 return NULL_TREE;
793 return NULL_TREE;
796 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
798 void
799 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
800 unsigned HOST_WIDE_INT size,
801 unsigned int align)
803 tree attr = DECL_ATTRIBUTES (decl);
805 if (attr)
806 attr = lookup_attribute ("common_object", attr);
807 if (attr)
809 tree id = TREE_VALUE (TREE_VALUE (attr));
810 const char *name;
812 if (TREE_CODE (id) == IDENTIFIER_NODE)
813 name = IDENTIFIER_POINTER (id);
814 else if (TREE_CODE (id) == STRING_CST)
815 name = TREE_STRING_POINTER (id);
816 else
817 abort ();
819 fprintf (file, "\t.vms_common\t\"%s\",", name);
821 else
822 fprintf (file, "%s", COMMON_ASM_OP);
824 /* Code from elfos.h. */
825 assemble_name (file, name);
826 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
827 size, align / BITS_PER_UNIT);
829 fputc ('\n', file);
832 static void
833 ia64_encode_addr_area (tree decl, rtx symbol)
835 int flags;
837 flags = SYMBOL_REF_FLAGS (symbol);
838 switch (ia64_get_addr_area (decl))
840 case ADDR_AREA_NORMAL: break;
841 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
842 default: gcc_unreachable ();
844 SYMBOL_REF_FLAGS (symbol) = flags;
847 static void
848 ia64_encode_section_info (tree decl, rtx rtl, int first)
850 default_encode_section_info (decl, rtl, first);
852 /* Careful not to prod global register variables. */
853 if (TREE_CODE (decl) == VAR_DECL
854 && GET_CODE (DECL_RTL (decl)) == MEM
855 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
856 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
857 ia64_encode_addr_area (decl, XEXP (rtl, 0));
860 /* Return 1 if the operands of a move are ok. */
863 ia64_move_ok (rtx dst, rtx src)
865 /* If we're under init_recog_no_volatile, we'll not be able to use
866 memory_operand. So check the code directly and don't worry about
867 the validity of the underlying address, which should have been
868 checked elsewhere anyway. */
869 if (GET_CODE (dst) != MEM)
870 return 1;
871 if (GET_CODE (src) == MEM)
872 return 0;
873 if (register_operand (src, VOIDmode))
874 return 1;
876 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
877 if (INTEGRAL_MODE_P (GET_MODE (dst)))
878 return src == const0_rtx;
879 else
880 return satisfies_constraint_G (src);
883 /* Return 1 if the operands are ok for a floating point load pair. */
886 ia64_load_pair_ok (rtx dst, rtx src)
888 /* ??? There is a thinko in the implementation of the "x" constraint and the
889 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
890 also return false for it. */
891 if (GET_CODE (dst) != REG
892 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
893 return 0;
894 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
895 return 0;
896 switch (GET_CODE (XEXP (src, 0)))
898 case REG:
899 case POST_INC:
900 break;
901 case POST_DEC:
902 return 0;
903 case POST_MODIFY:
905 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
907 if (GET_CODE (adjust) != CONST_INT
908 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
909 return 0;
911 break;
912 default:
913 abort ();
915 return 1;
919 addp4_optimize_ok (rtx op1, rtx op2)
921 return (basereg_operand (op1, GET_MODE(op1)) !=
922 basereg_operand (op2, GET_MODE(op2)));
925 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
926 Return the length of the field, or <= 0 on failure. */
929 ia64_depz_field_mask (rtx rop, rtx rshift)
931 unsigned HOST_WIDE_INT op = INTVAL (rop);
932 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
934 /* Get rid of the zero bits we're shifting in. */
935 op >>= shift;
937 /* We must now have a solid block of 1's at bit 0. */
938 return exact_log2 (op + 1);
941 /* Return the TLS model to use for ADDR. */
943 static enum tls_model
944 tls_symbolic_operand_type (rtx addr)
946 enum tls_model tls_kind = TLS_MODEL_NONE;
948 if (GET_CODE (addr) == CONST)
950 if (GET_CODE (XEXP (addr, 0)) == PLUS
951 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
952 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
954 else if (GET_CODE (addr) == SYMBOL_REF)
955 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
957 return tls_kind;
960 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
961 as a base register. */
963 static inline bool
964 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
966 if (strict
967 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
968 return true;
969 else if (!strict
970 && (GENERAL_REGNO_P (REGNO (reg))
971 || !HARD_REGISTER_P (reg)))
972 return true;
973 else
974 return false;
977 static bool
978 ia64_legitimate_address_reg (const_rtx reg, bool strict)
980 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
981 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
982 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
983 return true;
985 return false;
988 static bool
989 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
991 if (GET_CODE (disp) == PLUS
992 && rtx_equal_p (reg, XEXP (disp, 0))
993 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
994 || (CONST_INT_P (XEXP (disp, 1))
995 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
996 return true;
998 return false;
1001 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1003 static bool
1004 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1005 rtx x, bool strict)
1007 if (ia64_legitimate_address_reg (x, strict))
1008 return true;
1009 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1010 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1011 && XEXP (x, 0) != arg_pointer_rtx)
1012 return true;
1013 else if (GET_CODE (x) == POST_MODIFY
1014 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1015 && XEXP (x, 0) != arg_pointer_rtx
1016 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1017 return true;
1018 else
1019 return false;
1022 /* Return true if X is a constant that is valid for some immediate
1023 field in an instruction. */
1025 static bool
1026 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1028 switch (GET_CODE (x))
1030 case CONST_INT:
1031 case LABEL_REF:
1032 return true;
1034 case CONST_DOUBLE:
1035 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1036 return true;
1037 return satisfies_constraint_G (x);
1039 case CONST:
1040 case SYMBOL_REF:
1041 /* ??? Short term workaround for PR 28490. We must make the code here
1042 match the code in ia64_expand_move and move_operand, even though they
1043 are both technically wrong. */
1044 if (tls_symbolic_operand_type (x) == 0)
1046 HOST_WIDE_INT addend = 0;
1047 rtx op = x;
1049 if (GET_CODE (op) == CONST
1050 && GET_CODE (XEXP (op, 0)) == PLUS
1051 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1053 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1054 op = XEXP (XEXP (op, 0), 0);
1057 if (any_offset_symbol_operand (op, mode)
1058 || function_operand (op, mode))
1059 return true;
1060 if (aligned_offset_symbol_operand (op, mode))
1061 return (addend & 0x3fff) == 0;
1062 return false;
1064 return false;
1066 case CONST_VECTOR:
1067 if (mode == V2SFmode)
1068 return satisfies_constraint_Y (x);
1070 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1071 && GET_MODE_SIZE (mode) <= 8);
1073 default:
1074 return false;
1078 /* Don't allow TLS addresses to get spilled to memory. */
1080 static bool
1081 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1083 if (mode == RFmode)
1084 return true;
1085 return tls_symbolic_operand_type (x) != 0;
1088 /* Expand a symbolic constant load. */
1090 bool
1091 ia64_expand_load_address (rtx dest, rtx src)
1093 gcc_assert (GET_CODE (dest) == REG);
1095 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1096 having to pointer-extend the value afterward. Other forms of address
1097 computation below are also more natural to compute as 64-bit quantities.
1098 If we've been given an SImode destination register, change it. */
1099 if (GET_MODE (dest) != Pmode)
1100 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1101 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1103 if (TARGET_NO_PIC)
1104 return false;
1105 if (small_addr_symbolic_operand (src, VOIDmode))
1106 return false;
1108 if (TARGET_AUTO_PIC)
1109 emit_insn (gen_load_gprel64 (dest, src));
1110 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1111 emit_insn (gen_load_fptr (dest, src));
1112 else if (sdata_symbolic_operand (src, VOIDmode))
1113 emit_insn (gen_load_gprel (dest, src));
1114 else if (local_symbolic_operand64 (src, VOIDmode))
1116 /* We want to use @gprel rather than @ltoff relocations for local
1117 symbols:
1118 - @gprel does not require dynamic linker
1119 - and does not use .sdata section
1120 https://gcc.gnu.org/bugzilla/60465 */
1121 emit_insn (gen_load_gprel64 (dest, src));
1123 else
1125 HOST_WIDE_INT addend = 0;
1126 rtx tmp;
1128 /* We did split constant offsets in ia64_expand_move, and we did try
1129 to keep them split in move_operand, but we also allowed reload to
1130 rematerialize arbitrary constants rather than spill the value to
1131 the stack and reload it. So we have to be prepared here to split
1132 them apart again. */
1133 if (GET_CODE (src) == CONST)
1135 HOST_WIDE_INT hi, lo;
1137 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1138 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1139 hi = hi - lo;
1141 if (lo != 0)
1143 addend = lo;
1144 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1148 tmp = gen_rtx_HIGH (Pmode, src);
1149 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1150 emit_insn (gen_rtx_SET (dest, tmp));
1152 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1153 emit_insn (gen_rtx_SET (dest, tmp));
1155 if (addend)
1157 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1158 emit_insn (gen_rtx_SET (dest, tmp));
1162 return true;
1165 static GTY(()) rtx gen_tls_tga;
1166 static rtx
1167 gen_tls_get_addr (void)
1169 if (!gen_tls_tga)
1170 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1171 return gen_tls_tga;
1174 static GTY(()) rtx thread_pointer_rtx;
1175 static rtx
1176 gen_thread_pointer (void)
1178 if (!thread_pointer_rtx)
1179 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1180 return thread_pointer_rtx;
1183 static rtx
1184 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1185 rtx orig_op1, HOST_WIDE_INT addend)
1187 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1188 rtx_insn *insns;
1189 rtx orig_op0 = op0;
1190 HOST_WIDE_INT addend_lo, addend_hi;
1192 switch (tls_kind)
1194 case TLS_MODEL_GLOBAL_DYNAMIC:
1195 start_sequence ();
1197 tga_op1 = gen_reg_rtx (Pmode);
1198 emit_insn (gen_load_dtpmod (tga_op1, op1));
1200 tga_op2 = gen_reg_rtx (Pmode);
1201 emit_insn (gen_load_dtprel (tga_op2, op1));
1203 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1204 LCT_CONST, Pmode, 2, tga_op1,
1205 Pmode, tga_op2, Pmode);
1207 insns = get_insns ();
1208 end_sequence ();
1210 if (GET_MODE (op0) != Pmode)
1211 op0 = tga_ret;
1212 emit_libcall_block (insns, op0, tga_ret, op1);
1213 break;
1215 case TLS_MODEL_LOCAL_DYNAMIC:
1216 /* ??? This isn't the completely proper way to do local-dynamic
1217 If the call to __tls_get_addr is used only by a single symbol,
1218 then we should (somehow) move the dtprel to the second arg
1219 to avoid the extra add. */
1220 start_sequence ();
1222 tga_op1 = gen_reg_rtx (Pmode);
1223 emit_insn (gen_load_dtpmod (tga_op1, op1));
1225 tga_op2 = const0_rtx;
1227 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1228 LCT_CONST, Pmode, 2, tga_op1,
1229 Pmode, tga_op2, Pmode);
1231 insns = get_insns ();
1232 end_sequence ();
1234 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1235 UNSPEC_LD_BASE);
1236 tmp = gen_reg_rtx (Pmode);
1237 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1239 if (!register_operand (op0, Pmode))
1240 op0 = gen_reg_rtx (Pmode);
1241 if (TARGET_TLS64)
1243 emit_insn (gen_load_dtprel (op0, op1));
1244 emit_insn (gen_adddi3 (op0, tmp, op0));
1246 else
1247 emit_insn (gen_add_dtprel (op0, op1, tmp));
1248 break;
1250 case TLS_MODEL_INITIAL_EXEC:
1251 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1252 addend_hi = addend - addend_lo;
1254 op1 = plus_constant (Pmode, op1, addend_hi);
1255 addend = addend_lo;
1257 tmp = gen_reg_rtx (Pmode);
1258 emit_insn (gen_load_tprel (tmp, op1));
1260 if (!register_operand (op0, Pmode))
1261 op0 = gen_reg_rtx (Pmode);
1262 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1263 break;
1265 case TLS_MODEL_LOCAL_EXEC:
1266 if (!register_operand (op0, Pmode))
1267 op0 = gen_reg_rtx (Pmode);
1269 op1 = orig_op1;
1270 addend = 0;
1271 if (TARGET_TLS64)
1273 emit_insn (gen_load_tprel (op0, op1));
1274 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1276 else
1277 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1278 break;
1280 default:
1281 gcc_unreachable ();
1284 if (addend)
1285 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1286 orig_op0, 1, OPTAB_DIRECT);
1287 if (orig_op0 == op0)
1288 return NULL_RTX;
1289 if (GET_MODE (orig_op0) == Pmode)
1290 return op0;
1291 return gen_lowpart (GET_MODE (orig_op0), op0);
1295 ia64_expand_move (rtx op0, rtx op1)
1297 machine_mode mode = GET_MODE (op0);
1299 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1300 op1 = force_reg (mode, op1);
1302 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1304 HOST_WIDE_INT addend = 0;
1305 enum tls_model tls_kind;
1306 rtx sym = op1;
1308 if (GET_CODE (op1) == CONST
1309 && GET_CODE (XEXP (op1, 0)) == PLUS
1310 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1312 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1313 sym = XEXP (XEXP (op1, 0), 0);
1316 tls_kind = tls_symbolic_operand_type (sym);
1317 if (tls_kind)
1318 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1320 if (any_offset_symbol_operand (sym, mode))
1321 addend = 0;
1322 else if (aligned_offset_symbol_operand (sym, mode))
1324 HOST_WIDE_INT addend_lo, addend_hi;
1326 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1327 addend_hi = addend - addend_lo;
1329 if (addend_lo != 0)
1331 op1 = plus_constant (mode, sym, addend_hi);
1332 addend = addend_lo;
1334 else
1335 addend = 0;
1337 else
1338 op1 = sym;
1340 if (reload_completed)
1342 /* We really should have taken care of this offset earlier. */
1343 gcc_assert (addend == 0);
1344 if (ia64_expand_load_address (op0, op1))
1345 return NULL_RTX;
1348 if (addend)
1350 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1352 emit_insn (gen_rtx_SET (subtarget, op1));
1354 op1 = expand_simple_binop (mode, PLUS, subtarget,
1355 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1356 if (op0 == op1)
1357 return NULL_RTX;
1361 return op1;
1364 /* Split a move from OP1 to OP0 conditional on COND. */
1366 void
1367 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1369 rtx_insn *insn, *first = get_last_insn ();
1371 emit_move_insn (op0, op1);
1373 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1374 if (INSN_P (insn))
1375 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1376 PATTERN (insn));
1379 /* Split a post-reload TImode or TFmode reference into two DImode
1380 components. This is made extra difficult by the fact that we do
1381 not get any scratch registers to work with, because reload cannot
1382 be prevented from giving us a scratch that overlaps the register
1383 pair involved. So instead, when addressing memory, we tweak the
1384 pointer register up and back down with POST_INCs. Or up and not
1385 back down when we can get away with it.
1387 REVERSED is true when the loads must be done in reversed order
1388 (high word first) for correctness. DEAD is true when the pointer
1389 dies with the second insn we generate and therefore the second
1390 address must not carry a postmodify.
1392 May return an insn which is to be emitted after the moves. */
1394 static rtx
1395 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1397 rtx fixup = 0;
1399 switch (GET_CODE (in))
1401 case REG:
1402 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1403 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1404 break;
1406 case CONST_INT:
1407 case CONST_DOUBLE:
1408 /* Cannot occur reversed. */
1409 gcc_assert (!reversed);
1411 if (GET_MODE (in) != TFmode)
1412 split_double (in, &out[0], &out[1]);
1413 else
1414 /* split_double does not understand how to split a TFmode
1415 quantity into a pair of DImode constants. */
1417 unsigned HOST_WIDE_INT p[2];
1418 long l[4]; /* TFmode is 128 bits */
1420 real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1422 if (FLOAT_WORDS_BIG_ENDIAN)
1424 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1425 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1427 else
1429 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1430 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1432 out[0] = GEN_INT (p[0]);
1433 out[1] = GEN_INT (p[1]);
1435 break;
1437 case MEM:
1439 rtx base = XEXP (in, 0);
1440 rtx offset;
1442 switch (GET_CODE (base))
1444 case REG:
1445 if (!reversed)
1447 out[0] = adjust_automodify_address
1448 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1449 out[1] = adjust_automodify_address
1450 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1452 else
1454 /* Reversal requires a pre-increment, which can only
1455 be done as a separate insn. */
1456 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1457 out[0] = adjust_automodify_address
1458 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1459 out[1] = adjust_address (in, DImode, 0);
1461 break;
1463 case POST_INC:
1464 gcc_assert (!reversed && !dead);
1466 /* Just do the increment in two steps. */
1467 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1468 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1469 break;
1471 case POST_DEC:
1472 gcc_assert (!reversed && !dead);
1474 /* Add 8, subtract 24. */
1475 base = XEXP (base, 0);
1476 out[0] = adjust_automodify_address
1477 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1478 out[1] = adjust_automodify_address
1479 (in, DImode,
1480 gen_rtx_POST_MODIFY (Pmode, base,
1481 plus_constant (Pmode, base, -24)),
1483 break;
1485 case POST_MODIFY:
1486 gcc_assert (!reversed && !dead);
1488 /* Extract and adjust the modification. This case is
1489 trickier than the others, because we might have an
1490 index register, or we might have a combined offset that
1491 doesn't fit a signed 9-bit displacement field. We can
1492 assume the incoming expression is already legitimate. */
1493 offset = XEXP (base, 1);
1494 base = XEXP (base, 0);
1496 out[0] = adjust_automodify_address
1497 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1499 if (GET_CODE (XEXP (offset, 1)) == REG)
1501 /* Can't adjust the postmodify to match. Emit the
1502 original, then a separate addition insn. */
1503 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1504 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1506 else
1508 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1509 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1511 /* Again the postmodify cannot be made to match,
1512 but in this case it's more efficient to get rid
1513 of the postmodify entirely and fix up with an
1514 add insn. */
1515 out[1] = adjust_automodify_address (in, DImode, base, 8);
1516 fixup = gen_adddi3
1517 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1519 else
1521 /* Combined offset still fits in the displacement field.
1522 (We cannot overflow it at the high end.) */
1523 out[1] = adjust_automodify_address
1524 (in, DImode, gen_rtx_POST_MODIFY
1525 (Pmode, base, gen_rtx_PLUS
1526 (Pmode, base,
1527 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1531 break;
1533 default:
1534 gcc_unreachable ();
1536 break;
1539 default:
1540 gcc_unreachable ();
1543 return fixup;
1546 /* Split a TImode or TFmode move instruction after reload.
1547 This is used by *movtf_internal and *movti_internal. */
1548 void
1549 ia64_split_tmode_move (rtx operands[])
1551 rtx in[2], out[2], insn;
1552 rtx fixup[2];
1553 bool dead = false;
1554 bool reversed = false;
1556 /* It is possible for reload to decide to overwrite a pointer with
1557 the value it points to. In that case we have to do the loads in
1558 the appropriate order so that the pointer is not destroyed too
1559 early. Also we must not generate a postmodify for that second
1560 load, or rws_access_regno will die. And we must not generate a
1561 postmodify for the second load if the destination register
1562 overlaps with the base register. */
1563 if (GET_CODE (operands[1]) == MEM
1564 && reg_overlap_mentioned_p (operands[0], operands[1]))
1566 rtx base = XEXP (operands[1], 0);
1567 while (GET_CODE (base) != REG)
1568 base = XEXP (base, 0);
1570 if (REGNO (base) == REGNO (operands[0]))
1571 reversed = true;
1573 if (refers_to_regno_p (REGNO (operands[0]),
1574 REGNO (operands[0])+2,
1575 base, 0))
1576 dead = true;
1578 /* Another reason to do the moves in reversed order is if the first
1579 element of the target register pair is also the second element of
1580 the source register pair. */
1581 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1582 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1583 reversed = true;
1585 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1586 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1588 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1589 if (GET_CODE (EXP) == MEM \
1590 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1591 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1592 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1593 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1595 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1596 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1597 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1599 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1600 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1601 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1603 if (fixup[0])
1604 emit_insn (fixup[0]);
1605 if (fixup[1])
1606 emit_insn (fixup[1]);
1608 #undef MAYBE_ADD_REG_INC_NOTE
1611 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1612 through memory plus an extra GR scratch register. Except that you can
1613 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1614 SECONDARY_RELOAD_CLASS, but not both.
1616 We got into problems in the first place by allowing a construct like
1617 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1618 This solution attempts to prevent this situation from occurring. When
1619 we see something like the above, we spill the inner register to memory. */
1621 static rtx
1622 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1624 if (GET_CODE (in) == SUBREG
1625 && GET_MODE (SUBREG_REG (in)) == TImode
1626 && GET_CODE (SUBREG_REG (in)) == REG)
1628 rtx memt = assign_stack_temp (TImode, 16);
1629 emit_move_insn (memt, SUBREG_REG (in));
1630 return adjust_address (memt, mode, 0);
1632 else if (force && GET_CODE (in) == REG)
1634 rtx memx = assign_stack_temp (mode, 16);
1635 emit_move_insn (memx, in);
1636 return memx;
1638 else
1639 return in;
1642 /* Expand the movxf or movrf pattern (MODE says which) with the given
1643 OPERANDS, returning true if the pattern should then invoke
1644 DONE. */
1646 bool
1647 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1649 rtx op0 = operands[0];
1651 if (GET_CODE (op0) == SUBREG)
1652 op0 = SUBREG_REG (op0);
1654 /* We must support XFmode loads into general registers for stdarg/vararg,
1655 unprototyped calls, and a rare case where a long double is passed as
1656 an argument after a float HFA fills the FP registers. We split them into
1657 DImode loads for convenience. We also need to support XFmode stores
1658 for the last case. This case does not happen for stdarg/vararg routines,
1659 because we do a block store to memory of unnamed arguments. */
1661 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1663 rtx out[2];
1665 /* We're hoping to transform everything that deals with XFmode
1666 quantities and GR registers early in the compiler. */
1667 gcc_assert (can_create_pseudo_p ());
1669 /* Struct to register can just use TImode instead. */
1670 if ((GET_CODE (operands[1]) == SUBREG
1671 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1672 || (GET_CODE (operands[1]) == REG
1673 && GR_REGNO_P (REGNO (operands[1]))))
1675 rtx op1 = operands[1];
1677 if (GET_CODE (op1) == SUBREG)
1678 op1 = SUBREG_REG (op1);
1679 else
1680 op1 = gen_rtx_REG (TImode, REGNO (op1));
1682 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1683 return true;
1686 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1688 /* Don't word-swap when reading in the constant. */
1689 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1690 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1691 0, mode));
1692 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1693 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1694 0, mode));
1695 return true;
1698 /* If the quantity is in a register not known to be GR, spill it. */
1699 if (register_operand (operands[1], mode))
1700 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1702 gcc_assert (GET_CODE (operands[1]) == MEM);
1704 /* Don't word-swap when reading in the value. */
1705 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1706 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1708 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1709 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1710 return true;
1713 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1715 /* We're hoping to transform everything that deals with XFmode
1716 quantities and GR registers early in the compiler. */
1717 gcc_assert (can_create_pseudo_p ());
1719 /* Op0 can't be a GR_REG here, as that case is handled above.
1720 If op0 is a register, then we spill op1, so that we now have a
1721 MEM operand. This requires creating an XFmode subreg of a TImode reg
1722 to force the spill. */
1723 if (register_operand (operands[0], mode))
1725 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1726 op1 = gen_rtx_SUBREG (mode, op1, 0);
1727 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1730 else
1732 rtx in[2];
1734 gcc_assert (GET_CODE (operands[0]) == MEM);
1736 /* Don't word-swap when writing out the value. */
1737 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1738 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1740 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1741 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1742 return true;
1746 if (!reload_in_progress && !reload_completed)
1748 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1750 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1752 rtx memt, memx, in = operands[1];
1753 if (CONSTANT_P (in))
1754 in = validize_mem (force_const_mem (mode, in));
1755 if (GET_CODE (in) == MEM)
1756 memt = adjust_address (in, TImode, 0);
1757 else
1759 memt = assign_stack_temp (TImode, 16);
1760 memx = adjust_address (memt, mode, 0);
1761 emit_move_insn (memx, in);
1763 emit_move_insn (op0, memt);
1764 return true;
1767 if (!ia64_move_ok (operands[0], operands[1]))
1768 operands[1] = force_reg (mode, operands[1]);
1771 return false;
1774 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1775 with the expression that holds the compare result (in VOIDmode). */
1777 static GTY(()) rtx cmptf_libfunc;
1779 void
1780 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1782 enum rtx_code code = GET_CODE (*expr);
1783 rtx cmp;
1785 /* If we have a BImode input, then we already have a compare result, and
1786 do not need to emit another comparison. */
1787 if (GET_MODE (*op0) == BImode)
1789 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1790 cmp = *op0;
1792 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1793 magic number as its third argument, that indicates what to do.
1794 The return value is an integer to be compared against zero. */
1795 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1797 enum qfcmp_magic {
1798 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
1799 QCMP_UNORD = 2,
1800 QCMP_EQ = 4,
1801 QCMP_LT = 8,
1802 QCMP_GT = 16
1804 int magic;
1805 enum rtx_code ncode;
1806 rtx ret;
1808 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1809 switch (code)
1811 /* 1 = equal, 0 = not equal. Equality operators do
1812 not raise FP_INVALID when given a NaN operand. */
1813 case EQ: magic = QCMP_EQ; ncode = NE; break;
1814 case NE: magic = QCMP_EQ; ncode = EQ; break;
1815 /* isunordered() from C99. */
1816 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1817 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1818 /* Relational operators raise FP_INVALID when given
1819 a NaN operand. */
1820 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1821 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1822 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1823 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1824 /* Unordered relational operators do not raise FP_INVALID
1825 when given a NaN operand. */
1826 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1827 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1828 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1829 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1830 /* Not supported. */
1831 case UNEQ:
1832 case LTGT:
1833 default: gcc_unreachable ();
1836 start_sequence ();
1838 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1839 *op0, TFmode, *op1, TFmode,
1840 GEN_INT (magic), DImode);
1841 cmp = gen_reg_rtx (BImode);
1842 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1843 ret, const0_rtx)));
1845 rtx_insn *insns = get_insns ();
1846 end_sequence ();
1848 emit_libcall_block (insns, cmp, cmp,
1849 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1850 code = NE;
1852 else
1854 cmp = gen_reg_rtx (BImode);
1855 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1856 code = NE;
1859 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1860 *op0 = cmp;
1861 *op1 = const0_rtx;
1864 /* Generate an integral vector comparison. Return true if the condition has
1865 been reversed, and so the sense of the comparison should be inverted. */
1867 static bool
1868 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1869 rtx dest, rtx op0, rtx op1)
1871 bool negate = false;
1872 rtx x;
1874 /* Canonicalize the comparison to EQ, GT, GTU. */
1875 switch (code)
1877 case EQ:
1878 case GT:
1879 case GTU:
1880 break;
1882 case NE:
1883 case LE:
1884 case LEU:
1885 code = reverse_condition (code);
1886 negate = true;
1887 break;
1889 case GE:
1890 case GEU:
1891 code = reverse_condition (code);
1892 negate = true;
1893 /* FALLTHRU */
1895 case LT:
1896 case LTU:
1897 code = swap_condition (code);
1898 x = op0, op0 = op1, op1 = x;
1899 break;
1901 default:
1902 gcc_unreachable ();
1905 /* Unsigned parallel compare is not supported by the hardware. Play some
1906 tricks to turn this into a signed comparison against 0. */
1907 if (code == GTU)
1909 switch (mode)
1911 case V2SImode:
1913 rtx t1, t2, mask;
1915 /* Subtract (-(INT MAX) - 1) from both operands to make
1916 them signed. */
1917 mask = gen_int_mode (0x80000000, SImode);
1918 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1919 mask = force_reg (mode, mask);
1920 t1 = gen_reg_rtx (mode);
1921 emit_insn (gen_subv2si3 (t1, op0, mask));
1922 t2 = gen_reg_rtx (mode);
1923 emit_insn (gen_subv2si3 (t2, op1, mask));
1924 op0 = t1;
1925 op1 = t2;
1926 code = GT;
1928 break;
1930 case V8QImode:
1931 case V4HImode:
1932 /* Perform a parallel unsigned saturating subtraction. */
1933 x = gen_reg_rtx (mode);
1934 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1936 code = EQ;
1937 op0 = x;
1938 op1 = CONST0_RTX (mode);
1939 negate = !negate;
1940 break;
1942 default:
1943 gcc_unreachable ();
1947 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1948 emit_insn (gen_rtx_SET (dest, x));
1950 return negate;
1953 /* Emit an integral vector conditional move. */
1955 void
1956 ia64_expand_vecint_cmov (rtx operands[])
1958 machine_mode mode = GET_MODE (operands[0]);
1959 enum rtx_code code = GET_CODE (operands[3]);
1960 bool negate;
1961 rtx cmp, x, ot, of;
1963 cmp = gen_reg_rtx (mode);
1964 negate = ia64_expand_vecint_compare (code, mode, cmp,
1965 operands[4], operands[5]);
1967 ot = operands[1+negate];
1968 of = operands[2-negate];
1970 if (ot == CONST0_RTX (mode))
1972 if (of == CONST0_RTX (mode))
1974 emit_move_insn (operands[0], ot);
1975 return;
1978 x = gen_rtx_NOT (mode, cmp);
1979 x = gen_rtx_AND (mode, x, of);
1980 emit_insn (gen_rtx_SET (operands[0], x));
1982 else if (of == CONST0_RTX (mode))
1984 x = gen_rtx_AND (mode, cmp, ot);
1985 emit_insn (gen_rtx_SET (operands[0], x));
1987 else
1989 rtx t, f;
1991 t = gen_reg_rtx (mode);
1992 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1993 emit_insn (gen_rtx_SET (t, x));
1995 f = gen_reg_rtx (mode);
1996 x = gen_rtx_NOT (mode, cmp);
1997 x = gen_rtx_AND (mode, x, operands[2-negate]);
1998 emit_insn (gen_rtx_SET (f, x));
2000 x = gen_rtx_IOR (mode, t, f);
2001 emit_insn (gen_rtx_SET (operands[0], x));
2005 /* Emit an integral vector min or max operation. Return true if all done. */
2007 bool
2008 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2009 rtx operands[])
2011 rtx xops[6];
2013 /* These four combinations are supported directly. */
2014 if (mode == V8QImode && (code == UMIN || code == UMAX))
2015 return false;
2016 if (mode == V4HImode && (code == SMIN || code == SMAX))
2017 return false;
2019 /* This combination can be implemented with only saturating subtraction. */
2020 if (mode == V4HImode && code == UMAX)
2022 rtx x, tmp = gen_reg_rtx (mode);
2024 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2025 emit_insn (gen_rtx_SET (tmp, x));
2027 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2028 return true;
2031 /* Everything else implemented via vector comparisons. */
2032 xops[0] = operands[0];
2033 xops[4] = xops[1] = operands[1];
2034 xops[5] = xops[2] = operands[2];
2036 switch (code)
2038 case UMIN:
2039 code = LTU;
2040 break;
2041 case UMAX:
2042 code = GTU;
2043 break;
2044 case SMIN:
2045 code = LT;
2046 break;
2047 case SMAX:
2048 code = GT;
2049 break;
2050 default:
2051 gcc_unreachable ();
2053 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2055 ia64_expand_vecint_cmov (xops);
2056 return true;
2059 /* The vectors LO and HI each contain N halves of a double-wide vector.
2060 Reassemble either the first N/2 or the second N/2 elements. */
2062 void
2063 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2065 machine_mode vmode = GET_MODE (lo);
2066 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2067 struct expand_vec_perm_d d;
2068 bool ok;
2070 d.target = gen_lowpart (vmode, out);
2071 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2072 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2073 d.vmode = vmode;
2074 d.nelt = nelt;
2075 d.one_operand_p = false;
2076 d.testing_p = false;
2078 high = (highp ? nelt / 2 : 0);
2079 for (i = 0; i < nelt / 2; ++i)
2081 d.perm[i * 2] = i + high;
2082 d.perm[i * 2 + 1] = i + high + nelt;
2085 ok = ia64_expand_vec_perm_const_1 (&d);
2086 gcc_assert (ok);
2089 /* Return a vector of the sign-extension of VEC. */
2091 static rtx
2092 ia64_unpack_sign (rtx vec, bool unsignedp)
2094 machine_mode mode = GET_MODE (vec);
2095 rtx zero = CONST0_RTX (mode);
2097 if (unsignedp)
2098 return zero;
2099 else
2101 rtx sign = gen_reg_rtx (mode);
2102 bool neg;
2104 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2105 gcc_assert (!neg);
2107 return sign;
2111 /* Emit an integral vector unpack operation. */
2113 void
2114 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2116 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2117 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2120 /* Emit an integral vector widening sum operations. */
2122 void
2123 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2125 machine_mode wmode;
2126 rtx l, h, t, sign;
2128 sign = ia64_unpack_sign (operands[1], unsignedp);
2130 wmode = GET_MODE (operands[0]);
2131 l = gen_reg_rtx (wmode);
2132 h = gen_reg_rtx (wmode);
2134 ia64_unpack_assemble (l, operands[1], sign, false);
2135 ia64_unpack_assemble (h, operands[1], sign, true);
2137 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2138 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2139 if (t != operands[0])
2140 emit_move_insn (operands[0], t);
2143 /* Emit the appropriate sequence for a call. */
2145 void
2146 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2147 int sibcall_p)
2149 rtx insn, b0;
2151 addr = XEXP (addr, 0);
2152 addr = convert_memory_address (DImode, addr);
2153 b0 = gen_rtx_REG (DImode, R_BR (0));
2155 /* ??? Should do this for functions known to bind local too. */
2156 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2158 if (sibcall_p)
2159 insn = gen_sibcall_nogp (addr);
2160 else if (! retval)
2161 insn = gen_call_nogp (addr, b0);
2162 else
2163 insn = gen_call_value_nogp (retval, addr, b0);
2164 insn = emit_call_insn (insn);
2166 else
2168 if (sibcall_p)
2169 insn = gen_sibcall_gp (addr);
2170 else if (! retval)
2171 insn = gen_call_gp (addr, b0);
2172 else
2173 insn = gen_call_value_gp (retval, addr, b0);
2174 insn = emit_call_insn (insn);
2176 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2179 if (sibcall_p)
2180 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2182 if (TARGET_ABI_OPEN_VMS)
2183 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2184 gen_rtx_REG (DImode, GR_REG (25)));
2187 static void
2188 reg_emitted (enum ia64_frame_regs r)
2190 if (emitted_frame_related_regs[r] == 0)
2191 emitted_frame_related_regs[r] = current_frame_info.r[r];
2192 else
2193 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2196 static int
2197 get_reg (enum ia64_frame_regs r)
2199 reg_emitted (r);
2200 return current_frame_info.r[r];
2203 static bool
2204 is_emitted (int regno)
2206 unsigned int r;
2208 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2209 if (emitted_frame_related_regs[r] == regno)
2210 return true;
2211 return false;
2214 void
2215 ia64_reload_gp (void)
2217 rtx tmp;
2219 if (current_frame_info.r[reg_save_gp])
2221 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2223 else
2225 HOST_WIDE_INT offset;
2226 rtx offset_r;
2228 offset = (current_frame_info.spill_cfa_off
2229 + current_frame_info.spill_size);
2230 if (frame_pointer_needed)
2232 tmp = hard_frame_pointer_rtx;
2233 offset = -offset;
2235 else
2237 tmp = stack_pointer_rtx;
2238 offset = current_frame_info.total_size - offset;
2241 offset_r = GEN_INT (offset);
2242 if (satisfies_constraint_I (offset_r))
2243 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2244 else
2246 emit_move_insn (pic_offset_table_rtx, offset_r);
2247 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2248 pic_offset_table_rtx, tmp));
2251 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2254 emit_move_insn (pic_offset_table_rtx, tmp);
2257 void
2258 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2259 rtx scratch_b, int noreturn_p, int sibcall_p)
2261 rtx insn;
2262 bool is_desc = false;
2264 /* If we find we're calling through a register, then we're actually
2265 calling through a descriptor, so load up the values. */
2266 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2268 rtx tmp;
2269 bool addr_dead_p;
2271 /* ??? We are currently constrained to *not* use peep2, because
2272 we can legitimately change the global lifetime of the GP
2273 (in the form of killing where previously live). This is
2274 because a call through a descriptor doesn't use the previous
2275 value of the GP, while a direct call does, and we do not
2276 commit to either form until the split here.
2278 That said, this means that we lack precise life info for
2279 whether ADDR is dead after this call. This is not terribly
2280 important, since we can fix things up essentially for free
2281 with the POST_DEC below, but it's nice to not use it when we
2282 can immediately tell it's not necessary. */
2283 addr_dead_p = ((noreturn_p || sibcall_p
2284 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2285 REGNO (addr)))
2286 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2288 /* Load the code address into scratch_b. */
2289 tmp = gen_rtx_POST_INC (Pmode, addr);
2290 tmp = gen_rtx_MEM (Pmode, tmp);
2291 emit_move_insn (scratch_r, tmp);
2292 emit_move_insn (scratch_b, scratch_r);
2294 /* Load the GP address. If ADDR is not dead here, then we must
2295 revert the change made above via the POST_INCREMENT. */
2296 if (!addr_dead_p)
2297 tmp = gen_rtx_POST_DEC (Pmode, addr);
2298 else
2299 tmp = addr;
2300 tmp = gen_rtx_MEM (Pmode, tmp);
2301 emit_move_insn (pic_offset_table_rtx, tmp);
2303 is_desc = true;
2304 addr = scratch_b;
2307 if (sibcall_p)
2308 insn = gen_sibcall_nogp (addr);
2309 else if (retval)
2310 insn = gen_call_value_nogp (retval, addr, retaddr);
2311 else
2312 insn = gen_call_nogp (addr, retaddr);
2313 emit_call_insn (insn);
2315 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2316 ia64_reload_gp ();
2319 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2321 This differs from the generic code in that we know about the zero-extending
2322 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2323 also know that ld.acq+cmpxchg.rel equals a full barrier.
2325 The loop we want to generate looks like
2327 cmp_reg = mem;
2328 label:
2329 old_reg = cmp_reg;
2330 new_reg = cmp_reg op val;
2331 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2332 if (cmp_reg != old_reg)
2333 goto label;
2335 Note that we only do the plain load from memory once. Subsequent
2336 iterations use the value loaded by the compare-and-swap pattern. */
2338 void
2339 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2340 rtx old_dst, rtx new_dst, enum memmodel model)
2342 machine_mode mode = GET_MODE (mem);
2343 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2344 enum insn_code icode;
2346 /* Special case for using fetchadd. */
2347 if ((mode == SImode || mode == DImode)
2348 && (code == PLUS || code == MINUS)
2349 && fetchadd_operand (val, mode))
2351 if (code == MINUS)
2352 val = GEN_INT (-INTVAL (val));
2354 if (!old_dst)
2355 old_dst = gen_reg_rtx (mode);
2357 switch (model)
2359 case MEMMODEL_ACQ_REL:
2360 case MEMMODEL_SEQ_CST:
2361 case MEMMODEL_SYNC_SEQ_CST:
2362 emit_insn (gen_memory_barrier ());
2363 /* FALLTHRU */
2364 case MEMMODEL_RELAXED:
2365 case MEMMODEL_ACQUIRE:
2366 case MEMMODEL_SYNC_ACQUIRE:
2367 case MEMMODEL_CONSUME:
2368 if (mode == SImode)
2369 icode = CODE_FOR_fetchadd_acq_si;
2370 else
2371 icode = CODE_FOR_fetchadd_acq_di;
2372 break;
2373 case MEMMODEL_RELEASE:
2374 case MEMMODEL_SYNC_RELEASE:
2375 if (mode == SImode)
2376 icode = CODE_FOR_fetchadd_rel_si;
2377 else
2378 icode = CODE_FOR_fetchadd_rel_di;
2379 break;
2381 default:
2382 gcc_unreachable ();
2385 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2387 if (new_dst)
2389 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2390 true, OPTAB_WIDEN);
2391 if (new_reg != new_dst)
2392 emit_move_insn (new_dst, new_reg);
2394 return;
2397 /* Because of the volatile mem read, we get an ld.acq, which is the
2398 front half of the full barrier. The end half is the cmpxchg.rel.
2399 For relaxed and release memory models, we don't need this. But we
2400 also don't bother trying to prevent it either. */
2401 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2402 || MEM_VOLATILE_P (mem));
2404 old_reg = gen_reg_rtx (DImode);
2405 cmp_reg = gen_reg_rtx (DImode);
2406 label = gen_label_rtx ();
2408 if (mode != DImode)
2410 val = simplify_gen_subreg (DImode, val, mode, 0);
2411 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2413 else
2414 emit_move_insn (cmp_reg, mem);
2416 emit_label (label);
2418 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2419 emit_move_insn (old_reg, cmp_reg);
2420 emit_move_insn (ar_ccv, cmp_reg);
2422 if (old_dst)
2423 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2425 new_reg = cmp_reg;
2426 if (code == NOT)
2428 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2429 true, OPTAB_DIRECT);
2430 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2432 else
2433 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2434 true, OPTAB_DIRECT);
2436 if (mode != DImode)
2437 new_reg = gen_lowpart (mode, new_reg);
2438 if (new_dst)
2439 emit_move_insn (new_dst, new_reg);
2441 switch (model)
2443 case MEMMODEL_RELAXED:
2444 case MEMMODEL_ACQUIRE:
2445 case MEMMODEL_SYNC_ACQUIRE:
2446 case MEMMODEL_CONSUME:
2447 switch (mode)
2449 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2450 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2451 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2452 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2453 default:
2454 gcc_unreachable ();
2456 break;
2458 case MEMMODEL_RELEASE:
2459 case MEMMODEL_SYNC_RELEASE:
2460 case MEMMODEL_ACQ_REL:
2461 case MEMMODEL_SEQ_CST:
2462 case MEMMODEL_SYNC_SEQ_CST:
2463 switch (mode)
2465 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2466 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2467 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2468 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2469 default:
2470 gcc_unreachable ();
2472 break;
2474 default:
2475 gcc_unreachable ();
2478 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2480 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2483 /* Begin the assembly file. */
2485 static void
2486 ia64_file_start (void)
2488 default_file_start ();
2489 emit_safe_across_calls ();
2492 void
2493 emit_safe_across_calls (void)
2495 unsigned int rs, re;
2496 int out_state;
2498 rs = 1;
2499 out_state = 0;
2500 while (1)
2502 while (rs < 64 && call_used_regs[PR_REG (rs)])
2503 rs++;
2504 if (rs >= 64)
2505 break;
2506 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2507 continue;
2508 if (out_state == 0)
2510 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2511 out_state = 1;
2513 else
2514 fputc (',', asm_out_file);
2515 if (re == rs + 1)
2516 fprintf (asm_out_file, "p%u", rs);
2517 else
2518 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2519 rs = re + 1;
2521 if (out_state)
2522 fputc ('\n', asm_out_file);
2525 /* Globalize a declaration. */
2527 static void
2528 ia64_globalize_decl_name (FILE * stream, tree decl)
2530 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2531 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2532 if (version_attr)
2534 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2535 const char *p = TREE_STRING_POINTER (v);
2536 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2538 targetm.asm_out.globalize_label (stream, name);
2539 if (TREE_CODE (decl) == FUNCTION_DECL)
2540 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2543 /* Helper function for ia64_compute_frame_size: find an appropriate general
2544 register to spill some special register to. SPECIAL_SPILL_MASK contains
2545 bits in GR0 to GR31 that have already been allocated by this routine.
2546 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2548 static int
2549 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2551 int regno;
2553 if (emitted_frame_related_regs[r] != 0)
2555 regno = emitted_frame_related_regs[r];
2556 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2557 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2558 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2559 else if (crtl->is_leaf
2560 && regno >= GR_REG (1) && regno <= GR_REG (31))
2561 current_frame_info.gr_used_mask |= 1 << regno;
2563 return regno;
2566 /* If this is a leaf function, first try an otherwise unused
2567 call-clobbered register. */
2568 if (crtl->is_leaf)
2570 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2571 if (! df_regs_ever_live_p (regno)
2572 && call_used_regs[regno]
2573 && ! fixed_regs[regno]
2574 && ! global_regs[regno]
2575 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2576 && ! is_emitted (regno))
2578 current_frame_info.gr_used_mask |= 1 << regno;
2579 return regno;
2583 if (try_locals)
2585 regno = current_frame_info.n_local_regs;
2586 /* If there is a frame pointer, then we can't use loc79, because
2587 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2588 reg_name switching code in ia64_expand_prologue. */
2589 while (regno < (80 - frame_pointer_needed))
2590 if (! is_emitted (LOC_REG (regno++)))
2592 current_frame_info.n_local_regs = regno;
2593 return LOC_REG (regno - 1);
2597 /* Failed to find a general register to spill to. Must use stack. */
2598 return 0;
2601 /* In order to make for nice schedules, we try to allocate every temporary
2602 to a different register. We must of course stay away from call-saved,
2603 fixed, and global registers. We must also stay away from registers
2604 allocated in current_frame_info.gr_used_mask, since those include regs
2605 used all through the prologue.
2607 Any register allocated here must be used immediately. The idea is to
2608 aid scheduling, not to solve data flow problems. */
2610 static int last_scratch_gr_reg;
2612 static int
2613 next_scratch_gr_reg (void)
2615 int i, regno;
2617 for (i = 0; i < 32; ++i)
2619 regno = (last_scratch_gr_reg + i + 1) & 31;
2620 if (call_used_regs[regno]
2621 && ! fixed_regs[regno]
2622 && ! global_regs[regno]
2623 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2625 last_scratch_gr_reg = regno;
2626 return regno;
2630 /* There must be _something_ available. */
2631 gcc_unreachable ();
2634 /* Helper function for ia64_compute_frame_size, called through
2635 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2637 static void
2638 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2640 unsigned int regno = REGNO (reg);
2641 if (regno < 32)
2643 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2644 for (i = 0; i < n; ++i)
2645 current_frame_info.gr_used_mask |= 1 << (regno + i);
2650 /* Returns the number of bytes offset between the frame pointer and the stack
2651 pointer for the current function. SIZE is the number of bytes of space
2652 needed for local variables. */
2654 static void
2655 ia64_compute_frame_size (HOST_WIDE_INT size)
2657 HOST_WIDE_INT total_size;
2658 HOST_WIDE_INT spill_size = 0;
2659 HOST_WIDE_INT extra_spill_size = 0;
2660 HOST_WIDE_INT pretend_args_size;
2661 HARD_REG_SET mask;
2662 int n_spilled = 0;
2663 int spilled_gr_p = 0;
2664 int spilled_fr_p = 0;
2665 unsigned int regno;
2666 int min_regno;
2667 int max_regno;
2668 int i;
2670 if (current_frame_info.initialized)
2671 return;
2673 memset (&current_frame_info, 0, sizeof current_frame_info);
2674 CLEAR_HARD_REG_SET (mask);
2676 /* Don't allocate scratches to the return register. */
2677 diddle_return_value (mark_reg_gr_used_mask, NULL);
2679 /* Don't allocate scratches to the EH scratch registers. */
2680 if (cfun->machine->ia64_eh_epilogue_sp)
2681 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2682 if (cfun->machine->ia64_eh_epilogue_bsp)
2683 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2685 /* Static stack checking uses r2 and r3. */
2686 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2687 current_frame_info.gr_used_mask |= 0xc;
2689 /* Find the size of the register stack frame. We have only 80 local
2690 registers, because we reserve 8 for the inputs and 8 for the
2691 outputs. */
2693 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2694 since we'll be adjusting that down later. */
2695 regno = LOC_REG (78) + ! frame_pointer_needed;
2696 for (; regno >= LOC_REG (0); regno--)
2697 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2698 break;
2699 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2701 /* For functions marked with the syscall_linkage attribute, we must mark
2702 all eight input registers as in use, so that locals aren't visible to
2703 the caller. */
2705 if (cfun->machine->n_varargs > 0
2706 || lookup_attribute ("syscall_linkage",
2707 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2708 current_frame_info.n_input_regs = 8;
2709 else
2711 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2712 if (df_regs_ever_live_p (regno))
2713 break;
2714 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2717 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2718 if (df_regs_ever_live_p (regno))
2719 break;
2720 i = regno - OUT_REG (0) + 1;
2722 #ifndef PROFILE_HOOK
2723 /* When -p profiling, we need one output register for the mcount argument.
2724 Likewise for -a profiling for the bb_init_func argument. For -ax
2725 profiling, we need two output registers for the two bb_init_trace_func
2726 arguments. */
2727 if (crtl->profile)
2728 i = MAX (i, 1);
2729 #endif
2730 current_frame_info.n_output_regs = i;
2732 /* ??? No rotating register support yet. */
2733 current_frame_info.n_rotate_regs = 0;
2735 /* Discover which registers need spilling, and how much room that
2736 will take. Begin with floating point and general registers,
2737 which will always wind up on the stack. */
2739 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2740 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2742 SET_HARD_REG_BIT (mask, regno);
2743 spill_size += 16;
2744 n_spilled += 1;
2745 spilled_fr_p = 1;
2748 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2749 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2751 SET_HARD_REG_BIT (mask, regno);
2752 spill_size += 8;
2753 n_spilled += 1;
2754 spilled_gr_p = 1;
2757 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2758 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2760 SET_HARD_REG_BIT (mask, regno);
2761 spill_size += 8;
2762 n_spilled += 1;
2765 /* Now come all special registers that might get saved in other
2766 general registers. */
2768 if (frame_pointer_needed)
2770 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2771 /* If we did not get a register, then we take LOC79. This is guaranteed
2772 to be free, even if regs_ever_live is already set, because this is
2773 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2774 as we don't count loc79 above. */
2775 if (current_frame_info.r[reg_fp] == 0)
2777 current_frame_info.r[reg_fp] = LOC_REG (79);
2778 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2782 if (! crtl->is_leaf)
2784 /* Emit a save of BR0 if we call other functions. Do this even
2785 if this function doesn't return, as EH depends on this to be
2786 able to unwind the stack. */
2787 SET_HARD_REG_BIT (mask, BR_REG (0));
2789 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2790 if (current_frame_info.r[reg_save_b0] == 0)
2792 extra_spill_size += 8;
2793 n_spilled += 1;
2796 /* Similarly for ar.pfs. */
2797 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2798 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2799 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2801 extra_spill_size += 8;
2802 n_spilled += 1;
2805 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2806 registers are clobbered, so we fall back to the stack. */
2807 current_frame_info.r[reg_save_gp]
2808 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2809 if (current_frame_info.r[reg_save_gp] == 0)
2811 SET_HARD_REG_BIT (mask, GR_REG (1));
2812 spill_size += 8;
2813 n_spilled += 1;
2816 else
2818 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2820 SET_HARD_REG_BIT (mask, BR_REG (0));
2821 extra_spill_size += 8;
2822 n_spilled += 1;
2825 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2827 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2828 current_frame_info.r[reg_save_ar_pfs]
2829 = find_gr_spill (reg_save_ar_pfs, 1);
2830 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2832 extra_spill_size += 8;
2833 n_spilled += 1;
2838 /* Unwind descriptor hackery: things are most efficient if we allocate
2839 consecutive GR save registers for RP, PFS, FP in that order. However,
2840 it is absolutely critical that FP get the only hard register that's
2841 guaranteed to be free, so we allocated it first. If all three did
2842 happen to be allocated hard regs, and are consecutive, rearrange them
2843 into the preferred order now.
2845 If we have already emitted code for any of those registers,
2846 then it's already too late to change. */
2847 min_regno = MIN (current_frame_info.r[reg_fp],
2848 MIN (current_frame_info.r[reg_save_b0],
2849 current_frame_info.r[reg_save_ar_pfs]));
2850 max_regno = MAX (current_frame_info.r[reg_fp],
2851 MAX (current_frame_info.r[reg_save_b0],
2852 current_frame_info.r[reg_save_ar_pfs]));
2853 if (min_regno > 0
2854 && min_regno + 2 == max_regno
2855 && (current_frame_info.r[reg_fp] == min_regno + 1
2856 || current_frame_info.r[reg_save_b0] == min_regno + 1
2857 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2858 && (emitted_frame_related_regs[reg_save_b0] == 0
2859 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2860 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2861 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2862 && (emitted_frame_related_regs[reg_fp] == 0
2863 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2865 current_frame_info.r[reg_save_b0] = min_regno;
2866 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2867 current_frame_info.r[reg_fp] = min_regno + 2;
2870 /* See if we need to store the predicate register block. */
2871 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2872 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2873 break;
2874 if (regno <= PR_REG (63))
2876 SET_HARD_REG_BIT (mask, PR_REG (0));
2877 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2878 if (current_frame_info.r[reg_save_pr] == 0)
2880 extra_spill_size += 8;
2881 n_spilled += 1;
2884 /* ??? Mark them all as used so that register renaming and such
2885 are free to use them. */
2886 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2887 df_set_regs_ever_live (regno, true);
2890 /* If we're forced to use st8.spill, we're forced to save and restore
2891 ar.unat as well. The check for existing liveness allows inline asm
2892 to touch ar.unat. */
2893 if (spilled_gr_p || cfun->machine->n_varargs
2894 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2896 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2897 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2898 current_frame_info.r[reg_save_ar_unat]
2899 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2900 if (current_frame_info.r[reg_save_ar_unat] == 0)
2902 extra_spill_size += 8;
2903 n_spilled += 1;
2907 if (df_regs_ever_live_p (AR_LC_REGNUM))
2909 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2910 current_frame_info.r[reg_save_ar_lc]
2911 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2912 if (current_frame_info.r[reg_save_ar_lc] == 0)
2914 extra_spill_size += 8;
2915 n_spilled += 1;
2919 /* If we have an odd number of words of pretend arguments written to
2920 the stack, then the FR save area will be unaligned. We round the
2921 size of this area up to keep things 16 byte aligned. */
2922 if (spilled_fr_p)
2923 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2924 else
2925 pretend_args_size = crtl->args.pretend_args_size;
2927 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2928 + crtl->outgoing_args_size);
2929 total_size = IA64_STACK_ALIGN (total_size);
2931 /* We always use the 16-byte scratch area provided by the caller, but
2932 if we are a leaf function, there's no one to which we need to provide
2933 a scratch area. However, if the function allocates dynamic stack space,
2934 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2935 so we need to cope. */
2936 if (crtl->is_leaf && !cfun->calls_alloca)
2937 total_size = MAX (0, total_size - 16);
2939 current_frame_info.total_size = total_size;
2940 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2941 current_frame_info.spill_size = spill_size;
2942 current_frame_info.extra_spill_size = extra_spill_size;
2943 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2944 current_frame_info.n_spilled = n_spilled;
2945 current_frame_info.initialized = reload_completed;
2948 /* Worker function for TARGET_CAN_ELIMINATE. */
2950 bool
2951 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2953 return (to == BR_REG (0) ? crtl->is_leaf : true);
2956 /* Compute the initial difference between the specified pair of registers. */
2958 HOST_WIDE_INT
2959 ia64_initial_elimination_offset (int from, int to)
2961 HOST_WIDE_INT offset;
2963 ia64_compute_frame_size (get_frame_size ());
2964 switch (from)
2966 case FRAME_POINTER_REGNUM:
2967 switch (to)
2969 case HARD_FRAME_POINTER_REGNUM:
2970 offset = -current_frame_info.total_size;
2971 if (!crtl->is_leaf || cfun->calls_alloca)
2972 offset += 16 + crtl->outgoing_args_size;
2973 break;
2975 case STACK_POINTER_REGNUM:
2976 offset = 0;
2977 if (!crtl->is_leaf || cfun->calls_alloca)
2978 offset += 16 + crtl->outgoing_args_size;
2979 break;
2981 default:
2982 gcc_unreachable ();
2984 break;
2986 case ARG_POINTER_REGNUM:
2987 /* Arguments start above the 16 byte save area, unless stdarg
2988 in which case we store through the 16 byte save area. */
2989 switch (to)
2991 case HARD_FRAME_POINTER_REGNUM:
2992 offset = 16 - crtl->args.pretend_args_size;
2993 break;
2995 case STACK_POINTER_REGNUM:
2996 offset = (current_frame_info.total_size
2997 + 16 - crtl->args.pretend_args_size);
2998 break;
3000 default:
3001 gcc_unreachable ();
3003 break;
3005 default:
3006 gcc_unreachable ();
3009 return offset;
3012 /* If there are more than a trivial number of register spills, we use
3013 two interleaved iterators so that we can get two memory references
3014 per insn group.
3016 In order to simplify things in the prologue and epilogue expanders,
3017 we use helper functions to fix up the memory references after the
3018 fact with the appropriate offsets to a POST_MODIFY memory mode.
3019 The following data structure tracks the state of the two iterators
3020 while insns are being emitted. */
3022 struct spill_fill_data
3024 rtx_insn *init_after; /* point at which to emit initializations */
3025 rtx init_reg[2]; /* initial base register */
3026 rtx iter_reg[2]; /* the iterator registers */
3027 rtx *prev_addr[2]; /* address of last memory use */
3028 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
3029 HOST_WIDE_INT prev_off[2]; /* last offset */
3030 int n_iter; /* number of iterators in use */
3031 int next_iter; /* next iterator to use */
3032 unsigned int save_gr_used_mask;
3035 static struct spill_fill_data spill_fill_data;
3037 static void
3038 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3040 int i;
3042 spill_fill_data.init_after = get_last_insn ();
3043 spill_fill_data.init_reg[0] = init_reg;
3044 spill_fill_data.init_reg[1] = init_reg;
3045 spill_fill_data.prev_addr[0] = NULL;
3046 spill_fill_data.prev_addr[1] = NULL;
3047 spill_fill_data.prev_insn[0] = NULL;
3048 spill_fill_data.prev_insn[1] = NULL;
3049 spill_fill_data.prev_off[0] = cfa_off;
3050 spill_fill_data.prev_off[1] = cfa_off;
3051 spill_fill_data.next_iter = 0;
3052 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3054 spill_fill_data.n_iter = 1 + (n_spills > 2);
3055 for (i = 0; i < spill_fill_data.n_iter; ++i)
3057 int regno = next_scratch_gr_reg ();
3058 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3059 current_frame_info.gr_used_mask |= 1 << regno;
3063 static void
3064 finish_spill_pointers (void)
3066 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3069 static rtx
3070 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3072 int iter = spill_fill_data.next_iter;
3073 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3074 rtx disp_rtx = GEN_INT (disp);
3075 rtx mem;
3077 if (spill_fill_data.prev_addr[iter])
3079 if (satisfies_constraint_N (disp_rtx))
3081 *spill_fill_data.prev_addr[iter]
3082 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3083 gen_rtx_PLUS (DImode,
3084 spill_fill_data.iter_reg[iter],
3085 disp_rtx));
3086 add_reg_note (spill_fill_data.prev_insn[iter],
3087 REG_INC, spill_fill_data.iter_reg[iter]);
3089 else
3091 /* ??? Could use register post_modify for loads. */
3092 if (!satisfies_constraint_I (disp_rtx))
3094 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3095 emit_move_insn (tmp, disp_rtx);
3096 disp_rtx = tmp;
3098 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3099 spill_fill_data.iter_reg[iter], disp_rtx));
3102 /* Micro-optimization: if we've created a frame pointer, it's at
3103 CFA 0, which may allow the real iterator to be initialized lower,
3104 slightly increasing parallelism. Also, if there are few saves
3105 it may eliminate the iterator entirely. */
3106 else if (disp == 0
3107 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3108 && frame_pointer_needed)
3110 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3111 set_mem_alias_set (mem, get_varargs_alias_set ());
3112 return mem;
3114 else
3116 rtx seq;
3117 rtx_insn *insn;
3119 if (disp == 0)
3120 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3121 spill_fill_data.init_reg[iter]);
3122 else
3124 start_sequence ();
3126 if (!satisfies_constraint_I (disp_rtx))
3128 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3129 emit_move_insn (tmp, disp_rtx);
3130 disp_rtx = tmp;
3133 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3134 spill_fill_data.init_reg[iter],
3135 disp_rtx));
3137 seq = get_insns ();
3138 end_sequence ();
3141 /* Careful for being the first insn in a sequence. */
3142 if (spill_fill_data.init_after)
3143 insn = emit_insn_after (seq, spill_fill_data.init_after);
3144 else
3146 rtx_insn *first = get_insns ();
3147 if (first)
3148 insn = emit_insn_before (seq, first);
3149 else
3150 insn = emit_insn (seq);
3152 spill_fill_data.init_after = insn;
3155 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3157 /* ??? Not all of the spills are for varargs, but some of them are.
3158 The rest of the spills belong in an alias set of their own. But
3159 it doesn't actually hurt to include them here. */
3160 set_mem_alias_set (mem, get_varargs_alias_set ());
3162 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3163 spill_fill_data.prev_off[iter] = cfa_off;
3165 if (++iter >= spill_fill_data.n_iter)
3166 iter = 0;
3167 spill_fill_data.next_iter = iter;
3169 return mem;
3172 static void
3173 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3174 rtx frame_reg)
3176 int iter = spill_fill_data.next_iter;
3177 rtx mem;
3178 rtx_insn *insn;
3180 mem = spill_restore_mem (reg, cfa_off);
3181 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3182 spill_fill_data.prev_insn[iter] = insn;
3184 if (frame_reg)
3186 rtx base;
3187 HOST_WIDE_INT off;
3189 RTX_FRAME_RELATED_P (insn) = 1;
3191 /* Don't even pretend that the unwind code can intuit its way
3192 through a pair of interleaved post_modify iterators. Just
3193 provide the correct answer. */
3195 if (frame_pointer_needed)
3197 base = hard_frame_pointer_rtx;
3198 off = - cfa_off;
3200 else
3202 base = stack_pointer_rtx;
3203 off = current_frame_info.total_size - cfa_off;
3206 add_reg_note (insn, REG_CFA_OFFSET,
3207 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3208 plus_constant (Pmode,
3209 base, off)),
3210 frame_reg));
3214 static void
3215 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3217 int iter = spill_fill_data.next_iter;
3218 rtx_insn *insn;
3220 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3221 GEN_INT (cfa_off)));
3222 spill_fill_data.prev_insn[iter] = insn;
3225 /* Wrapper functions that discards the CONST_INT spill offset. These
3226 exist so that we can give gr_spill/gr_fill the offset they need and
3227 use a consistent function interface. */
3229 static rtx
3230 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3232 return gen_movdi (dest, src);
3235 static rtx
3236 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3238 return gen_fr_spill (dest, src);
3241 static rtx
3242 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3244 return gen_fr_restore (dest, src);
3247 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3249 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3250 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3252 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3253 inclusive. These are offsets from the current stack pointer. BS_SIZE
3254 is the size of the backing store. ??? This clobbers r2 and r3. */
3256 static void
3257 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3258 int bs_size)
3260 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3261 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3262 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3264 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3265 of the Register Stack Engine. We also need to probe it after checking
3266 that the 2 stacks don't overlap. */
3267 emit_insn (gen_bsp_value (r3));
3268 emit_move_insn (r2, GEN_INT (-(first + size)));
3270 /* Compare current value of BSP and SP registers. */
3271 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3272 r3, stack_pointer_rtx)));
3274 /* Compute the address of the probe for the Backing Store (which grows
3275 towards higher addresses). We probe only at the first offset of
3276 the next page because some OS (eg Linux/ia64) only extend the
3277 backing store when this specific address is hit (but generate a SEGV
3278 on other address). Page size is the worst case (4KB). The reserve
3279 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3280 Also compute the address of the last probe for the memory stack
3281 (which grows towards lower addresses). */
3282 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3283 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3285 /* Compare them and raise SEGV if the former has topped the latter. */
3286 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3287 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3288 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3289 r3, r2))));
3290 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3291 const0_rtx),
3292 const0_rtx));
3293 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3294 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3295 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3296 GEN_INT (11))));
3298 /* Probe the Backing Store if necessary. */
3299 if (bs_size > 0)
3300 emit_stack_probe (r3);
3302 /* Probe the memory stack if necessary. */
3303 if (size == 0)
3306 /* See if we have a constant small number of probes to generate. If so,
3307 that's the easy case. */
3308 else if (size <= PROBE_INTERVAL)
3309 emit_stack_probe (r2);
3311 /* The run-time loop is made up of 9 insns in the generic case while this
3312 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3313 else if (size <= 4 * PROBE_INTERVAL)
3315 HOST_WIDE_INT i;
3317 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3318 emit_insn (gen_rtx_SET (r2,
3319 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3320 emit_stack_probe (r2);
3322 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3323 it exceeds SIZE. If only two probes are needed, this will not
3324 generate any code. Then probe at FIRST + SIZE. */
3325 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3327 emit_insn (gen_rtx_SET (r2,
3328 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3329 emit_stack_probe (r2);
3332 emit_insn (gen_rtx_SET (r2,
3333 plus_constant (Pmode, r2,
3334 (i - PROBE_INTERVAL) - size)));
3335 emit_stack_probe (r2);
3338 /* Otherwise, do the same as above, but in a loop. Note that we must be
3339 extra careful with variables wrapping around because we might be at
3340 the very top (or the very bottom) of the address space and we have
3341 to be able to handle this case properly; in particular, we use an
3342 equality test for the loop condition. */
3343 else
3345 HOST_WIDE_INT rounded_size;
3347 emit_move_insn (r2, GEN_INT (-first));
3350 /* Step 1: round SIZE to the previous multiple of the interval. */
3352 rounded_size = size & -PROBE_INTERVAL;
3355 /* Step 2: compute initial and final value of the loop counter. */
3357 /* TEST_ADDR = SP + FIRST. */
3358 emit_insn (gen_rtx_SET (r2,
3359 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3361 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3362 if (rounded_size > (1 << 21))
3364 emit_move_insn (r3, GEN_INT (-rounded_size));
3365 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3367 else
3368 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3369 GEN_INT (-rounded_size))));
3372 /* Step 3: the loop
3376 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3377 probe at TEST_ADDR
3379 while (TEST_ADDR != LAST_ADDR)
3381 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3382 until it is equal to ROUNDED_SIZE. */
3384 emit_insn (gen_probe_stack_range (r2, r2, r3));
3387 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3388 that SIZE is equal to ROUNDED_SIZE. */
3390 /* TEMP = SIZE - ROUNDED_SIZE. */
3391 if (size != rounded_size)
3393 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3394 rounded_size - size)));
3395 emit_stack_probe (r2);
3399 /* Make sure nothing is scheduled before we are done. */
3400 emit_insn (gen_blockage ());
3403 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3404 absolute addresses. */
3406 const char *
3407 output_probe_stack_range (rtx reg1, rtx reg2)
3409 static int labelno = 0;
3410 char loop_lab[32];
3411 rtx xops[3];
3413 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3415 /* Loop. */
3416 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3418 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3419 xops[0] = reg1;
3420 xops[1] = GEN_INT (-PROBE_INTERVAL);
3421 output_asm_insn ("addl %0 = %1, %0", xops);
3422 fputs ("\t;;\n", asm_out_file);
3424 /* Probe at TEST_ADDR. */
3425 output_asm_insn ("probe.w.fault %0, 0", xops);
3427 /* Test if TEST_ADDR == LAST_ADDR. */
3428 xops[1] = reg2;
3429 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3430 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3432 /* Branch. */
3433 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3434 assemble_name_raw (asm_out_file, loop_lab);
3435 fputc ('\n', asm_out_file);
3437 return "";
3440 /* Called after register allocation to add any instructions needed for the
3441 prologue. Using a prologue insn is favored compared to putting all of the
3442 instructions in output_function_prologue(), since it allows the scheduler
3443 to intermix instructions with the saves of the caller saved registers. In
3444 some cases, it might be necessary to emit a barrier instruction as the last
3445 insn to prevent such scheduling.
3447 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3448 so that the debug info generation code can handle them properly.
3450 The register save area is laid out like so:
3451 cfa+16
3452 [ varargs spill area ]
3453 [ fr register spill area ]
3454 [ br register spill area ]
3455 [ ar register spill area ]
3456 [ pr register spill area ]
3457 [ gr register spill area ] */
3459 /* ??? Get inefficient code when the frame size is larger than can fit in an
3460 adds instruction. */
3462 void
3463 ia64_expand_prologue (void)
3465 rtx_insn *insn;
3466 rtx ar_pfs_save_reg, ar_unat_save_reg;
3467 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3468 rtx reg, alt_reg;
3470 ia64_compute_frame_size (get_frame_size ());
3471 last_scratch_gr_reg = 15;
3473 if (flag_stack_usage_info)
3474 current_function_static_stack_size = current_frame_info.total_size;
3476 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3478 HOST_WIDE_INT size = current_frame_info.total_size;
3479 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3480 + current_frame_info.n_local_regs);
3482 if (crtl->is_leaf && !cfun->calls_alloca)
3484 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3485 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3486 size - STACK_CHECK_PROTECT,
3487 bs_size);
3488 else if (size + bs_size > STACK_CHECK_PROTECT)
3489 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3491 else if (size + bs_size > 0)
3492 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3495 if (dump_file)
3497 fprintf (dump_file, "ia64 frame related registers "
3498 "recorded in current_frame_info.r[]:\n");
3499 #define PRINTREG(a) if (current_frame_info.r[a]) \
3500 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3501 PRINTREG(reg_fp);
3502 PRINTREG(reg_save_b0);
3503 PRINTREG(reg_save_pr);
3504 PRINTREG(reg_save_ar_pfs);
3505 PRINTREG(reg_save_ar_unat);
3506 PRINTREG(reg_save_ar_lc);
3507 PRINTREG(reg_save_gp);
3508 #undef PRINTREG
3511 /* If there is no epilogue, then we don't need some prologue insns.
3512 We need to avoid emitting the dead prologue insns, because flow
3513 will complain about them. */
3514 if (optimize)
3516 edge e;
3517 edge_iterator ei;
3519 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3520 if ((e->flags & EDGE_FAKE) == 0
3521 && (e->flags & EDGE_FALLTHRU) != 0)
3522 break;
3523 epilogue_p = (e != NULL);
3525 else
3526 epilogue_p = 1;
3528 /* Set the local, input, and output register names. We need to do this
3529 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3530 half. If we use in/loc/out register names, then we get assembler errors
3531 in crtn.S because there is no alloc insn or regstk directive in there. */
3532 if (! TARGET_REG_NAMES)
3534 int inputs = current_frame_info.n_input_regs;
3535 int locals = current_frame_info.n_local_regs;
3536 int outputs = current_frame_info.n_output_regs;
3538 for (i = 0; i < inputs; i++)
3539 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3540 for (i = 0; i < locals; i++)
3541 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3542 for (i = 0; i < outputs; i++)
3543 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3546 /* Set the frame pointer register name. The regnum is logically loc79,
3547 but of course we'll not have allocated that many locals. Rather than
3548 worrying about renumbering the existing rtxs, we adjust the name. */
3549 /* ??? This code means that we can never use one local register when
3550 there is a frame pointer. loc79 gets wasted in this case, as it is
3551 renamed to a register that will never be used. See also the try_locals
3552 code in find_gr_spill. */
3553 if (current_frame_info.r[reg_fp])
3555 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3556 reg_names[HARD_FRAME_POINTER_REGNUM]
3557 = reg_names[current_frame_info.r[reg_fp]];
3558 reg_names[current_frame_info.r[reg_fp]] = tmp;
3561 /* We don't need an alloc instruction if we've used no outputs or locals. */
3562 if (current_frame_info.n_local_regs == 0
3563 && current_frame_info.n_output_regs == 0
3564 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3565 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3567 /* If there is no alloc, but there are input registers used, then we
3568 need a .regstk directive. */
3569 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3570 ar_pfs_save_reg = NULL_RTX;
3572 else
3574 current_frame_info.need_regstk = 0;
3576 if (current_frame_info.r[reg_save_ar_pfs])
3578 regno = current_frame_info.r[reg_save_ar_pfs];
3579 reg_emitted (reg_save_ar_pfs);
3581 else
3582 regno = next_scratch_gr_reg ();
3583 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3585 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3586 GEN_INT (current_frame_info.n_input_regs),
3587 GEN_INT (current_frame_info.n_local_regs),
3588 GEN_INT (current_frame_info.n_output_regs),
3589 GEN_INT (current_frame_info.n_rotate_regs)));
3590 if (current_frame_info.r[reg_save_ar_pfs])
3592 RTX_FRAME_RELATED_P (insn) = 1;
3593 add_reg_note (insn, REG_CFA_REGISTER,
3594 gen_rtx_SET (ar_pfs_save_reg,
3595 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3599 /* Set up frame pointer, stack pointer, and spill iterators. */
3601 n_varargs = cfun->machine->n_varargs;
3602 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3603 stack_pointer_rtx, 0);
3605 if (frame_pointer_needed)
3607 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3608 RTX_FRAME_RELATED_P (insn) = 1;
3610 /* Force the unwind info to recognize this as defining a new CFA,
3611 rather than some temp register setup. */
3612 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3615 if (current_frame_info.total_size != 0)
3617 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3618 rtx offset;
3620 if (satisfies_constraint_I (frame_size_rtx))
3621 offset = frame_size_rtx;
3622 else
3624 regno = next_scratch_gr_reg ();
3625 offset = gen_rtx_REG (DImode, regno);
3626 emit_move_insn (offset, frame_size_rtx);
3629 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3630 stack_pointer_rtx, offset));
3632 if (! frame_pointer_needed)
3634 RTX_FRAME_RELATED_P (insn) = 1;
3635 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3636 gen_rtx_SET (stack_pointer_rtx,
3637 gen_rtx_PLUS (DImode,
3638 stack_pointer_rtx,
3639 frame_size_rtx)));
3642 /* ??? At this point we must generate a magic insn that appears to
3643 modify the stack pointer, the frame pointer, and all spill
3644 iterators. This would allow the most scheduling freedom. For
3645 now, just hard stop. */
3646 emit_insn (gen_blockage ());
3649 /* Must copy out ar.unat before doing any integer spills. */
3650 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3652 if (current_frame_info.r[reg_save_ar_unat])
3654 ar_unat_save_reg
3655 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3656 reg_emitted (reg_save_ar_unat);
3658 else
3660 alt_regno = next_scratch_gr_reg ();
3661 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3662 current_frame_info.gr_used_mask |= 1 << alt_regno;
3665 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3666 insn = emit_move_insn (ar_unat_save_reg, reg);
3667 if (current_frame_info.r[reg_save_ar_unat])
3669 RTX_FRAME_RELATED_P (insn) = 1;
3670 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3673 /* Even if we're not going to generate an epilogue, we still
3674 need to save the register so that EH works. */
3675 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3676 emit_insn (gen_prologue_use (ar_unat_save_reg));
3678 else
3679 ar_unat_save_reg = NULL_RTX;
3681 /* Spill all varargs registers. Do this before spilling any GR registers,
3682 since we want the UNAT bits for the GR registers to override the UNAT
3683 bits from varargs, which we don't care about. */
3685 cfa_off = -16;
3686 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3688 reg = gen_rtx_REG (DImode, regno);
3689 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3692 /* Locate the bottom of the register save area. */
3693 cfa_off = (current_frame_info.spill_cfa_off
3694 + current_frame_info.spill_size
3695 + current_frame_info.extra_spill_size);
3697 /* Save the predicate register block either in a register or in memory. */
3698 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3700 reg = gen_rtx_REG (DImode, PR_REG (0));
3701 if (current_frame_info.r[reg_save_pr] != 0)
3703 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3704 reg_emitted (reg_save_pr);
3705 insn = emit_move_insn (alt_reg, reg);
3707 /* ??? Denote pr spill/fill by a DImode move that modifies all
3708 64 hard registers. */
3709 RTX_FRAME_RELATED_P (insn) = 1;
3710 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3712 /* Even if we're not going to generate an epilogue, we still
3713 need to save the register so that EH works. */
3714 if (! epilogue_p)
3715 emit_insn (gen_prologue_use (alt_reg));
3717 else
3719 alt_regno = next_scratch_gr_reg ();
3720 alt_reg = gen_rtx_REG (DImode, alt_regno);
3721 insn = emit_move_insn (alt_reg, reg);
3722 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3723 cfa_off -= 8;
3727 /* Handle AR regs in numerical order. All of them get special handling. */
3728 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3729 && current_frame_info.r[reg_save_ar_unat] == 0)
3731 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3732 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3733 cfa_off -= 8;
3736 /* The alloc insn already copied ar.pfs into a general register. The
3737 only thing we have to do now is copy that register to a stack slot
3738 if we'd not allocated a local register for the job. */
3739 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3740 && current_frame_info.r[reg_save_ar_pfs] == 0)
3742 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3743 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3744 cfa_off -= 8;
3747 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3749 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3750 if (current_frame_info.r[reg_save_ar_lc] != 0)
3752 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3753 reg_emitted (reg_save_ar_lc);
3754 insn = emit_move_insn (alt_reg, reg);
3755 RTX_FRAME_RELATED_P (insn) = 1;
3756 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3758 /* Even if we're not going to generate an epilogue, we still
3759 need to save the register so that EH works. */
3760 if (! epilogue_p)
3761 emit_insn (gen_prologue_use (alt_reg));
3763 else
3765 alt_regno = next_scratch_gr_reg ();
3766 alt_reg = gen_rtx_REG (DImode, alt_regno);
3767 emit_move_insn (alt_reg, reg);
3768 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3769 cfa_off -= 8;
3773 /* Save the return pointer. */
3774 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3776 reg = gen_rtx_REG (DImode, BR_REG (0));
3777 if (current_frame_info.r[reg_save_b0] != 0)
3779 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3780 reg_emitted (reg_save_b0);
3781 insn = emit_move_insn (alt_reg, reg);
3782 RTX_FRAME_RELATED_P (insn) = 1;
3783 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3785 /* Even if we're not going to generate an epilogue, we still
3786 need to save the register so that EH works. */
3787 if (! epilogue_p)
3788 emit_insn (gen_prologue_use (alt_reg));
3790 else
3792 alt_regno = next_scratch_gr_reg ();
3793 alt_reg = gen_rtx_REG (DImode, alt_regno);
3794 emit_move_insn (alt_reg, reg);
3795 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3796 cfa_off -= 8;
3800 if (current_frame_info.r[reg_save_gp])
3802 reg_emitted (reg_save_gp);
3803 insn = emit_move_insn (gen_rtx_REG (DImode,
3804 current_frame_info.r[reg_save_gp]),
3805 pic_offset_table_rtx);
3808 /* We should now be at the base of the gr/br/fr spill area. */
3809 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3810 + current_frame_info.spill_size));
3812 /* Spill all general registers. */
3813 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3814 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3816 reg = gen_rtx_REG (DImode, regno);
3817 do_spill (gen_gr_spill, reg, cfa_off, reg);
3818 cfa_off -= 8;
3821 /* Spill the rest of the BR registers. */
3822 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3823 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3825 alt_regno = next_scratch_gr_reg ();
3826 alt_reg = gen_rtx_REG (DImode, alt_regno);
3827 reg = gen_rtx_REG (DImode, regno);
3828 emit_move_insn (alt_reg, reg);
3829 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3830 cfa_off -= 8;
3833 /* Align the frame and spill all FR registers. */
3834 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3835 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3837 gcc_assert (!(cfa_off & 15));
3838 reg = gen_rtx_REG (XFmode, regno);
3839 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3840 cfa_off -= 16;
3843 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3845 finish_spill_pointers ();
3848 /* Output the textual info surrounding the prologue. */
3850 void
3851 ia64_start_function (FILE *file, const char *fnname,
3852 tree decl ATTRIBUTE_UNUSED)
3854 #if TARGET_ABI_OPEN_VMS
3855 vms_start_function (fnname);
3856 #endif
3858 fputs ("\t.proc ", file);
3859 assemble_name (file, fnname);
3860 fputc ('\n', file);
3861 ASM_OUTPUT_LABEL (file, fnname);
3864 /* Called after register allocation to add any instructions needed for the
3865 epilogue. Using an epilogue insn is favored compared to putting all of the
3866 instructions in output_function_prologue(), since it allows the scheduler
3867 to intermix instructions with the saves of the caller saved registers. In
3868 some cases, it might be necessary to emit a barrier instruction as the last
3869 insn to prevent such scheduling. */
3871 void
3872 ia64_expand_epilogue (int sibcall_p)
3874 rtx_insn *insn;
3875 rtx reg, alt_reg, ar_unat_save_reg;
3876 int regno, alt_regno, cfa_off;
3878 ia64_compute_frame_size (get_frame_size ());
3880 /* If there is a frame pointer, then we use it instead of the stack
3881 pointer, so that the stack pointer does not need to be valid when
3882 the epilogue starts. See EXIT_IGNORE_STACK. */
3883 if (frame_pointer_needed)
3884 setup_spill_pointers (current_frame_info.n_spilled,
3885 hard_frame_pointer_rtx, 0);
3886 else
3887 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3888 current_frame_info.total_size);
3890 if (current_frame_info.total_size != 0)
3892 /* ??? At this point we must generate a magic insn that appears to
3893 modify the spill iterators and the frame pointer. This would
3894 allow the most scheduling freedom. For now, just hard stop. */
3895 emit_insn (gen_blockage ());
3898 /* Locate the bottom of the register save area. */
3899 cfa_off = (current_frame_info.spill_cfa_off
3900 + current_frame_info.spill_size
3901 + current_frame_info.extra_spill_size);
3903 /* Restore the predicate registers. */
3904 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3906 if (current_frame_info.r[reg_save_pr] != 0)
3908 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3909 reg_emitted (reg_save_pr);
3911 else
3913 alt_regno = next_scratch_gr_reg ();
3914 alt_reg = gen_rtx_REG (DImode, alt_regno);
3915 do_restore (gen_movdi_x, alt_reg, cfa_off);
3916 cfa_off -= 8;
3918 reg = gen_rtx_REG (DImode, PR_REG (0));
3919 emit_move_insn (reg, alt_reg);
3922 /* Restore the application registers. */
3924 /* Load the saved unat from the stack, but do not restore it until
3925 after the GRs have been restored. */
3926 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3928 if (current_frame_info.r[reg_save_ar_unat] != 0)
3930 ar_unat_save_reg
3931 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3932 reg_emitted (reg_save_ar_unat);
3934 else
3936 alt_regno = next_scratch_gr_reg ();
3937 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3938 current_frame_info.gr_used_mask |= 1 << alt_regno;
3939 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3940 cfa_off -= 8;
3943 else
3944 ar_unat_save_reg = NULL_RTX;
3946 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3948 reg_emitted (reg_save_ar_pfs);
3949 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3950 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3951 emit_move_insn (reg, alt_reg);
3953 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3955 alt_regno = next_scratch_gr_reg ();
3956 alt_reg = gen_rtx_REG (DImode, alt_regno);
3957 do_restore (gen_movdi_x, alt_reg, cfa_off);
3958 cfa_off -= 8;
3959 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3960 emit_move_insn (reg, alt_reg);
3963 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3965 if (current_frame_info.r[reg_save_ar_lc] != 0)
3967 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3968 reg_emitted (reg_save_ar_lc);
3970 else
3972 alt_regno = next_scratch_gr_reg ();
3973 alt_reg = gen_rtx_REG (DImode, alt_regno);
3974 do_restore (gen_movdi_x, alt_reg, cfa_off);
3975 cfa_off -= 8;
3977 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3978 emit_move_insn (reg, alt_reg);
3981 /* Restore the return pointer. */
3982 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3984 if (current_frame_info.r[reg_save_b0] != 0)
3986 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3987 reg_emitted (reg_save_b0);
3989 else
3991 alt_regno = next_scratch_gr_reg ();
3992 alt_reg = gen_rtx_REG (DImode, alt_regno);
3993 do_restore (gen_movdi_x, alt_reg, cfa_off);
3994 cfa_off -= 8;
3996 reg = gen_rtx_REG (DImode, BR_REG (0));
3997 emit_move_insn (reg, alt_reg);
4000 /* We should now be at the base of the gr/br/fr spill area. */
4001 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4002 + current_frame_info.spill_size));
4004 /* The GP may be stored on the stack in the prologue, but it's
4005 never restored in the epilogue. Skip the stack slot. */
4006 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4007 cfa_off -= 8;
4009 /* Restore all general registers. */
4010 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4011 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4013 reg = gen_rtx_REG (DImode, regno);
4014 do_restore (gen_gr_restore, reg, cfa_off);
4015 cfa_off -= 8;
4018 /* Restore the branch registers. */
4019 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4020 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4022 alt_regno = next_scratch_gr_reg ();
4023 alt_reg = gen_rtx_REG (DImode, alt_regno);
4024 do_restore (gen_movdi_x, alt_reg, cfa_off);
4025 cfa_off -= 8;
4026 reg = gen_rtx_REG (DImode, regno);
4027 emit_move_insn (reg, alt_reg);
4030 /* Restore floating point registers. */
4031 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4032 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4034 gcc_assert (!(cfa_off & 15));
4035 reg = gen_rtx_REG (XFmode, regno);
4036 do_restore (gen_fr_restore_x, reg, cfa_off);
4037 cfa_off -= 16;
4040 /* Restore ar.unat for real. */
4041 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4043 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4044 emit_move_insn (reg, ar_unat_save_reg);
4047 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4049 finish_spill_pointers ();
4051 if (current_frame_info.total_size
4052 || cfun->machine->ia64_eh_epilogue_sp
4053 || frame_pointer_needed)
4055 /* ??? At this point we must generate a magic insn that appears to
4056 modify the spill iterators, the stack pointer, and the frame
4057 pointer. This would allow the most scheduling freedom. For now,
4058 just hard stop. */
4059 emit_insn (gen_blockage ());
4062 if (cfun->machine->ia64_eh_epilogue_sp)
4063 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4064 else if (frame_pointer_needed)
4066 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4067 RTX_FRAME_RELATED_P (insn) = 1;
4068 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4070 else if (current_frame_info.total_size)
4072 rtx offset, frame_size_rtx;
4074 frame_size_rtx = GEN_INT (current_frame_info.total_size);
4075 if (satisfies_constraint_I (frame_size_rtx))
4076 offset = frame_size_rtx;
4077 else
4079 regno = next_scratch_gr_reg ();
4080 offset = gen_rtx_REG (DImode, regno);
4081 emit_move_insn (offset, frame_size_rtx);
4084 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4085 offset));
4087 RTX_FRAME_RELATED_P (insn) = 1;
4088 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4089 gen_rtx_SET (stack_pointer_rtx,
4090 gen_rtx_PLUS (DImode,
4091 stack_pointer_rtx,
4092 frame_size_rtx)));
4095 if (cfun->machine->ia64_eh_epilogue_bsp)
4096 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4098 if (! sibcall_p)
4099 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4100 else
4102 int fp = GR_REG (2);
4103 /* We need a throw away register here, r0 and r1 are reserved,
4104 so r2 is the first available call clobbered register. If
4105 there was a frame_pointer register, we may have swapped the
4106 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4107 sure we're using the string "r2" when emitting the register
4108 name for the assembler. */
4109 if (current_frame_info.r[reg_fp]
4110 && current_frame_info.r[reg_fp] == GR_REG (2))
4111 fp = HARD_FRAME_POINTER_REGNUM;
4113 /* We must emit an alloc to force the input registers to become output
4114 registers. Otherwise, if the callee tries to pass its parameters
4115 through to another call without an intervening alloc, then these
4116 values get lost. */
4117 /* ??? We don't need to preserve all input registers. We only need to
4118 preserve those input registers used as arguments to the sibling call.
4119 It is unclear how to compute that number here. */
4120 if (current_frame_info.n_input_regs != 0)
4122 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4124 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4125 const0_rtx, const0_rtx,
4126 n_inputs, const0_rtx));
4127 RTX_FRAME_RELATED_P (insn) = 1;
4129 /* ??? We need to mark the alloc as frame-related so that it gets
4130 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4131 But there's nothing dwarf2 related to be done wrt the register
4132 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4133 the empty parallel means dwarf2out will not see anything. */
4134 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4135 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4140 /* Return 1 if br.ret can do all the work required to return from a
4141 function. */
4144 ia64_direct_return (void)
4146 if (reload_completed && ! frame_pointer_needed)
4148 ia64_compute_frame_size (get_frame_size ());
4150 return (current_frame_info.total_size == 0
4151 && current_frame_info.n_spilled == 0
4152 && current_frame_info.r[reg_save_b0] == 0
4153 && current_frame_info.r[reg_save_pr] == 0
4154 && current_frame_info.r[reg_save_ar_pfs] == 0
4155 && current_frame_info.r[reg_save_ar_unat] == 0
4156 && current_frame_info.r[reg_save_ar_lc] == 0);
4158 return 0;
4161 /* Return the magic cookie that we use to hold the return address
4162 during early compilation. */
4165 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4167 if (count != 0)
4168 return NULL;
4169 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4172 /* Split this value after reload, now that we know where the return
4173 address is saved. */
4175 void
4176 ia64_split_return_addr_rtx (rtx dest)
4178 rtx src;
4180 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4182 if (current_frame_info.r[reg_save_b0] != 0)
4184 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4185 reg_emitted (reg_save_b0);
4187 else
4189 HOST_WIDE_INT off;
4190 unsigned int regno;
4191 rtx off_r;
4193 /* Compute offset from CFA for BR0. */
4194 /* ??? Must be kept in sync with ia64_expand_prologue. */
4195 off = (current_frame_info.spill_cfa_off
4196 + current_frame_info.spill_size);
4197 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4198 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4199 off -= 8;
4201 /* Convert CFA offset to a register based offset. */
4202 if (frame_pointer_needed)
4203 src = hard_frame_pointer_rtx;
4204 else
4206 src = stack_pointer_rtx;
4207 off += current_frame_info.total_size;
4210 /* Load address into scratch register. */
4211 off_r = GEN_INT (off);
4212 if (satisfies_constraint_I (off_r))
4213 emit_insn (gen_adddi3 (dest, src, off_r));
4214 else
4216 emit_move_insn (dest, off_r);
4217 emit_insn (gen_adddi3 (dest, src, dest));
4220 src = gen_rtx_MEM (Pmode, dest);
4223 else
4224 src = gen_rtx_REG (DImode, BR_REG (0));
4226 emit_move_insn (dest, src);
4230 ia64_hard_regno_rename_ok (int from, int to)
4232 /* Don't clobber any of the registers we reserved for the prologue. */
4233 unsigned int r;
4235 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4236 if (to == current_frame_info.r[r]
4237 || from == current_frame_info.r[r]
4238 || to == emitted_frame_related_regs[r]
4239 || from == emitted_frame_related_regs[r])
4240 return 0;
4242 /* Don't use output registers outside the register frame. */
4243 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4244 return 0;
4246 /* Retain even/oddness on predicate register pairs. */
4247 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4248 return (from & 1) == (to & 1);
4250 return 1;
4253 /* Target hook for assembling integer objects. Handle word-sized
4254 aligned objects and detect the cases when @fptr is needed. */
4256 static bool
4257 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4259 if (size == POINTER_SIZE / BITS_PER_UNIT
4260 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4261 && GET_CODE (x) == SYMBOL_REF
4262 && SYMBOL_REF_FUNCTION_P (x))
4264 static const char * const directive[2][2] = {
4265 /* 64-bit pointer */ /* 32-bit pointer */
4266 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4267 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4269 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4270 output_addr_const (asm_out_file, x);
4271 fputs (")\n", asm_out_file);
4272 return true;
4274 return default_assemble_integer (x, size, aligned_p);
4277 /* Emit the function prologue. */
4279 static void
4280 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4282 int mask, grsave, grsave_prev;
4284 if (current_frame_info.need_regstk)
4285 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4286 current_frame_info.n_input_regs,
4287 current_frame_info.n_local_regs,
4288 current_frame_info.n_output_regs,
4289 current_frame_info.n_rotate_regs);
4291 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4292 return;
4294 /* Emit the .prologue directive. */
4296 mask = 0;
4297 grsave = grsave_prev = 0;
4298 if (current_frame_info.r[reg_save_b0] != 0)
4300 mask |= 8;
4301 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4303 if (current_frame_info.r[reg_save_ar_pfs] != 0
4304 && (grsave_prev == 0
4305 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4307 mask |= 4;
4308 if (grsave_prev == 0)
4309 grsave = current_frame_info.r[reg_save_ar_pfs];
4310 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4312 if (current_frame_info.r[reg_fp] != 0
4313 && (grsave_prev == 0
4314 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4316 mask |= 2;
4317 if (grsave_prev == 0)
4318 grsave = HARD_FRAME_POINTER_REGNUM;
4319 grsave_prev = current_frame_info.r[reg_fp];
4321 if (current_frame_info.r[reg_save_pr] != 0
4322 && (grsave_prev == 0
4323 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4325 mask |= 1;
4326 if (grsave_prev == 0)
4327 grsave = current_frame_info.r[reg_save_pr];
4330 if (mask && TARGET_GNU_AS)
4331 fprintf (file, "\t.prologue %d, %d\n", mask,
4332 ia64_dbx_register_number (grsave));
4333 else
4334 fputs ("\t.prologue\n", file);
4336 /* Emit a .spill directive, if necessary, to relocate the base of
4337 the register spill area. */
4338 if (current_frame_info.spill_cfa_off != -16)
4339 fprintf (file, "\t.spill %ld\n",
4340 (long) (current_frame_info.spill_cfa_off
4341 + current_frame_info.spill_size));
4344 /* Emit the .body directive at the scheduled end of the prologue. */
4346 static void
4347 ia64_output_function_end_prologue (FILE *file)
4349 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4350 return;
4352 fputs ("\t.body\n", file);
4355 /* Emit the function epilogue. */
4357 static void
4358 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4359 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4361 int i;
4363 if (current_frame_info.r[reg_fp])
4365 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4366 reg_names[HARD_FRAME_POINTER_REGNUM]
4367 = reg_names[current_frame_info.r[reg_fp]];
4368 reg_names[current_frame_info.r[reg_fp]] = tmp;
4369 reg_emitted (reg_fp);
4371 if (! TARGET_REG_NAMES)
4373 for (i = 0; i < current_frame_info.n_input_regs; i++)
4374 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4375 for (i = 0; i < current_frame_info.n_local_regs; i++)
4376 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4377 for (i = 0; i < current_frame_info.n_output_regs; i++)
4378 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4381 current_frame_info.initialized = 0;
4385 ia64_dbx_register_number (int regno)
4387 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4388 from its home at loc79 to something inside the register frame. We
4389 must perform the same renumbering here for the debug info. */
4390 if (current_frame_info.r[reg_fp])
4392 if (regno == HARD_FRAME_POINTER_REGNUM)
4393 regno = current_frame_info.r[reg_fp];
4394 else if (regno == current_frame_info.r[reg_fp])
4395 regno = HARD_FRAME_POINTER_REGNUM;
4398 if (IN_REGNO_P (regno))
4399 return 32 + regno - IN_REG (0);
4400 else if (LOC_REGNO_P (regno))
4401 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4402 else if (OUT_REGNO_P (regno))
4403 return (32 + current_frame_info.n_input_regs
4404 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4405 else
4406 return regno;
4409 /* Implement TARGET_TRAMPOLINE_INIT.
4411 The trampoline should set the static chain pointer to value placed
4412 into the trampoline and should branch to the specified routine.
4413 To make the normal indirect-subroutine calling convention work,
4414 the trampoline must look like a function descriptor; the first
4415 word being the target address and the second being the target's
4416 global pointer.
4418 We abuse the concept of a global pointer by arranging for it
4419 to point to the data we need to load. The complete trampoline
4420 has the following form:
4422 +-------------------+ \
4423 TRAMP: | __ia64_trampoline | |
4424 +-------------------+ > fake function descriptor
4425 | TRAMP+16 | |
4426 +-------------------+ /
4427 | target descriptor |
4428 +-------------------+
4429 | static link |
4430 +-------------------+
4433 static void
4434 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4436 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4437 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4439 /* The Intel assembler requires that the global __ia64_trampoline symbol
4440 be declared explicitly */
4441 if (!TARGET_GNU_AS)
4443 static bool declared_ia64_trampoline = false;
4445 if (!declared_ia64_trampoline)
4447 declared_ia64_trampoline = true;
4448 (*targetm.asm_out.globalize_label) (asm_out_file,
4449 "__ia64_trampoline");
4453 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4454 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4455 fnaddr = convert_memory_address (Pmode, fnaddr);
4456 static_chain = convert_memory_address (Pmode, static_chain);
4458 /* Load up our iterator. */
4459 addr_reg = copy_to_reg (addr);
4460 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4462 /* The first two words are the fake descriptor:
4463 __ia64_trampoline, ADDR+16. */
4464 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4465 if (TARGET_ABI_OPEN_VMS)
4467 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4468 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4469 relocation against function symbols to make it identical to the
4470 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4471 strict ELF and dereference to get the bare code address. */
4472 rtx reg = gen_reg_rtx (Pmode);
4473 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4474 emit_move_insn (reg, tramp);
4475 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4476 tramp = reg;
4478 emit_move_insn (m_tramp, tramp);
4479 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4480 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4482 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4483 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4484 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4486 /* The third word is the target descriptor. */
4487 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4488 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4489 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4491 /* The fourth word is the static chain. */
4492 emit_move_insn (m_tramp, static_chain);
4495 /* Do any needed setup for a variadic function. CUM has not been updated
4496 for the last named argument which has type TYPE and mode MODE.
4498 We generate the actual spill instructions during prologue generation. */
4500 static void
4501 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4502 tree type, int * pretend_size,
4503 int second_time ATTRIBUTE_UNUSED)
4505 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4507 /* Skip the current argument. */
4508 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4510 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4512 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4513 *pretend_size = n * UNITS_PER_WORD;
4514 cfun->machine->n_varargs = n;
4518 /* Check whether TYPE is a homogeneous floating point aggregate. If
4519 it is, return the mode of the floating point type that appears
4520 in all leafs. If it is not, return VOIDmode.
4522 An aggregate is a homogeneous floating point aggregate is if all
4523 fields/elements in it have the same floating point type (e.g,
4524 SFmode). 128-bit quad-precision floats are excluded.
4526 Variable sized aggregates should never arrive here, since we should
4527 have already decided to pass them by reference. Top-level zero-sized
4528 aggregates are excluded because our parallels crash the middle-end. */
4530 static machine_mode
4531 hfa_element_mode (const_tree type, bool nested)
4533 machine_mode element_mode = VOIDmode;
4534 machine_mode mode;
4535 enum tree_code code = TREE_CODE (type);
4536 int know_element_mode = 0;
4537 tree t;
4539 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4540 return VOIDmode;
4542 switch (code)
4544 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4545 case BOOLEAN_TYPE: case POINTER_TYPE:
4546 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4547 case LANG_TYPE: case FUNCTION_TYPE:
4548 return VOIDmode;
4550 /* Fortran complex types are supposed to be HFAs, so we need to handle
4551 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4552 types though. */
4553 case COMPLEX_TYPE:
4554 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4555 && TYPE_MODE (type) != TCmode)
4556 return GET_MODE_INNER (TYPE_MODE (type));
4557 else
4558 return VOIDmode;
4560 case REAL_TYPE:
4561 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4562 mode if this is contained within an aggregate. */
4563 if (nested && TYPE_MODE (type) != TFmode)
4564 return TYPE_MODE (type);
4565 else
4566 return VOIDmode;
4568 case ARRAY_TYPE:
4569 return hfa_element_mode (TREE_TYPE (type), 1);
4571 case RECORD_TYPE:
4572 case UNION_TYPE:
4573 case QUAL_UNION_TYPE:
4574 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4576 if (TREE_CODE (t) != FIELD_DECL)
4577 continue;
4579 mode = hfa_element_mode (TREE_TYPE (t), 1);
4580 if (know_element_mode)
4582 if (mode != element_mode)
4583 return VOIDmode;
4585 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4586 return VOIDmode;
4587 else
4589 know_element_mode = 1;
4590 element_mode = mode;
4593 return element_mode;
4595 default:
4596 /* If we reach here, we probably have some front-end specific type
4597 that the backend doesn't know about. This can happen via the
4598 aggregate_value_p call in init_function_start. All we can do is
4599 ignore unknown tree types. */
4600 return VOIDmode;
4603 return VOIDmode;
4606 /* Return the number of words required to hold a quantity of TYPE and MODE
4607 when passed as an argument. */
4608 static int
4609 ia64_function_arg_words (const_tree type, machine_mode mode)
4611 int words;
4613 if (mode == BLKmode)
4614 words = int_size_in_bytes (type);
4615 else
4616 words = GET_MODE_SIZE (mode);
4618 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4621 /* Return the number of registers that should be skipped so the current
4622 argument (described by TYPE and WORDS) will be properly aligned.
4624 Integer and float arguments larger than 8 bytes start at the next
4625 even boundary. Aggregates larger than 8 bytes start at the next
4626 even boundary if the aggregate has 16 byte alignment. Note that
4627 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4628 but are still to be aligned in registers.
4630 ??? The ABI does not specify how to handle aggregates with
4631 alignment from 9 to 15 bytes, or greater than 16. We handle them
4632 all as if they had 16 byte alignment. Such aggregates can occur
4633 only if gcc extensions are used. */
4634 static int
4635 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4636 const_tree type, int words)
4638 /* No registers are skipped on VMS. */
4639 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4640 return 0;
4642 if (type
4643 && TREE_CODE (type) != INTEGER_TYPE
4644 && TREE_CODE (type) != REAL_TYPE)
4645 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4646 else
4647 return words > 1;
4650 /* Return rtx for register where argument is passed, or zero if it is passed
4651 on the stack. */
4652 /* ??? 128-bit quad-precision floats are always passed in general
4653 registers. */
4655 static rtx
4656 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4657 const_tree type, bool named, bool incoming)
4659 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4661 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4662 int words = ia64_function_arg_words (type, mode);
4663 int offset = ia64_function_arg_offset (cum, type, words);
4664 machine_mode hfa_mode = VOIDmode;
4666 /* For OPEN VMS, emit the instruction setting up the argument register here,
4667 when we know this will be together with the other arguments setup related
4668 insns. This is not the conceptually best place to do this, but this is
4669 the easiest as we have convenient access to cumulative args info. */
4671 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4672 && named == 1)
4674 unsigned HOST_WIDE_INT regval = cum->words;
4675 int i;
4677 for (i = 0; i < 8; i++)
4678 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4680 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4681 GEN_INT (regval));
4684 /* If all argument slots are used, then it must go on the stack. */
4685 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4686 return 0;
4688 /* On OpenVMS argument is either in Rn or Fn. */
4689 if (TARGET_ABI_OPEN_VMS)
4691 if (FLOAT_MODE_P (mode))
4692 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4693 else
4694 return gen_rtx_REG (mode, basereg + cum->words);
4697 /* Check for and handle homogeneous FP aggregates. */
4698 if (type)
4699 hfa_mode = hfa_element_mode (type, 0);
4701 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4702 and unprototyped hfas are passed specially. */
4703 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4705 rtx loc[16];
4706 int i = 0;
4707 int fp_regs = cum->fp_regs;
4708 int int_regs = cum->words + offset;
4709 int hfa_size = GET_MODE_SIZE (hfa_mode);
4710 int byte_size;
4711 int args_byte_size;
4713 /* If prototyped, pass it in FR regs then GR regs.
4714 If not prototyped, pass it in both FR and GR regs.
4716 If this is an SFmode aggregate, then it is possible to run out of
4717 FR regs while GR regs are still left. In that case, we pass the
4718 remaining part in the GR regs. */
4720 /* Fill the FP regs. We do this always. We stop if we reach the end
4721 of the argument, the last FP register, or the last argument slot. */
4723 byte_size = ((mode == BLKmode)
4724 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4725 args_byte_size = int_regs * UNITS_PER_WORD;
4726 offset = 0;
4727 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4728 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4730 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4731 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4732 + fp_regs)),
4733 GEN_INT (offset));
4734 offset += hfa_size;
4735 args_byte_size += hfa_size;
4736 fp_regs++;
4739 /* If no prototype, then the whole thing must go in GR regs. */
4740 if (! cum->prototype)
4741 offset = 0;
4742 /* If this is an SFmode aggregate, then we might have some left over
4743 that needs to go in GR regs. */
4744 else if (byte_size != offset)
4745 int_regs += offset / UNITS_PER_WORD;
4747 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4749 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4751 machine_mode gr_mode = DImode;
4752 unsigned int gr_size;
4754 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4755 then this goes in a GR reg left adjusted/little endian, right
4756 adjusted/big endian. */
4757 /* ??? Currently this is handled wrong, because 4-byte hunks are
4758 always right adjusted/little endian. */
4759 if (offset & 0x4)
4760 gr_mode = SImode;
4761 /* If we have an even 4 byte hunk because the aggregate is a
4762 multiple of 4 bytes in size, then this goes in a GR reg right
4763 adjusted/little endian. */
4764 else if (byte_size - offset == 4)
4765 gr_mode = SImode;
4767 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4768 gen_rtx_REG (gr_mode, (basereg
4769 + int_regs)),
4770 GEN_INT (offset));
4772 gr_size = GET_MODE_SIZE (gr_mode);
4773 offset += gr_size;
4774 if (gr_size == UNITS_PER_WORD
4775 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4776 int_regs++;
4777 else if (gr_size > UNITS_PER_WORD)
4778 int_regs += gr_size / UNITS_PER_WORD;
4780 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4783 /* Integral and aggregates go in general registers. If we have run out of
4784 FR registers, then FP values must also go in general registers. This can
4785 happen when we have a SFmode HFA. */
4786 else if (mode == TFmode || mode == TCmode
4787 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4789 int byte_size = ((mode == BLKmode)
4790 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4791 if (BYTES_BIG_ENDIAN
4792 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4793 && byte_size < UNITS_PER_WORD
4794 && byte_size > 0)
4796 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4797 gen_rtx_REG (DImode,
4798 (basereg + cum->words
4799 + offset)),
4800 const0_rtx);
4801 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4803 else
4804 return gen_rtx_REG (mode, basereg + cum->words + offset);
4808 /* If there is a prototype, then FP values go in a FR register when
4809 named, and in a GR register when unnamed. */
4810 else if (cum->prototype)
4812 if (named)
4813 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4814 /* In big-endian mode, an anonymous SFmode value must be represented
4815 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4816 the value into the high half of the general register. */
4817 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4818 return gen_rtx_PARALLEL (mode,
4819 gen_rtvec (1,
4820 gen_rtx_EXPR_LIST (VOIDmode,
4821 gen_rtx_REG (DImode, basereg + cum->words + offset),
4822 const0_rtx)));
4823 else
4824 return gen_rtx_REG (mode, basereg + cum->words + offset);
4826 /* If there is no prototype, then FP values go in both FR and GR
4827 registers. */
4828 else
4830 /* See comment above. */
4831 machine_mode inner_mode =
4832 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4834 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4835 gen_rtx_REG (mode, (FR_ARG_FIRST
4836 + cum->fp_regs)),
4837 const0_rtx);
4838 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4839 gen_rtx_REG (inner_mode,
4840 (basereg + cum->words
4841 + offset)),
4842 const0_rtx);
4844 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4848 /* Implement TARGET_FUNCION_ARG target hook. */
4850 static rtx
4851 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4852 const_tree type, bool named)
4854 return ia64_function_arg_1 (cum, mode, type, named, false);
4857 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4859 static rtx
4860 ia64_function_incoming_arg (cumulative_args_t cum,
4861 machine_mode mode,
4862 const_tree type, bool named)
4864 return ia64_function_arg_1 (cum, mode, type, named, true);
4867 /* Return number of bytes, at the beginning of the argument, that must be
4868 put in registers. 0 is the argument is entirely in registers or entirely
4869 in memory. */
4871 static int
4872 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4873 tree type, bool named ATTRIBUTE_UNUSED)
4875 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4877 int words = ia64_function_arg_words (type, mode);
4878 int offset = ia64_function_arg_offset (cum, type, words);
4880 /* If all argument slots are used, then it must go on the stack. */
4881 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4882 return 0;
4884 /* It doesn't matter whether the argument goes in FR or GR regs. If
4885 it fits within the 8 argument slots, then it goes entirely in
4886 registers. If it extends past the last argument slot, then the rest
4887 goes on the stack. */
4889 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4890 return 0;
4892 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4895 /* Return ivms_arg_type based on machine_mode. */
4897 static enum ivms_arg_type
4898 ia64_arg_type (machine_mode mode)
4900 switch (mode)
4902 case SFmode:
4903 return FS;
4904 case DFmode:
4905 return FT;
4906 default:
4907 return I64;
4911 /* Update CUM to point after this argument. This is patterned after
4912 ia64_function_arg. */
4914 static void
4915 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4916 const_tree type, bool named)
4918 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4919 int words = ia64_function_arg_words (type, mode);
4920 int offset = ia64_function_arg_offset (cum, type, words);
4921 machine_mode hfa_mode = VOIDmode;
4923 /* If all arg slots are already full, then there is nothing to do. */
4924 if (cum->words >= MAX_ARGUMENT_SLOTS)
4926 cum->words += words + offset;
4927 return;
4930 cum->atypes[cum->words] = ia64_arg_type (mode);
4931 cum->words += words + offset;
4933 /* On OpenVMS argument is either in Rn or Fn. */
4934 if (TARGET_ABI_OPEN_VMS)
4936 cum->int_regs = cum->words;
4937 cum->fp_regs = cum->words;
4938 return;
4941 /* Check for and handle homogeneous FP aggregates. */
4942 if (type)
4943 hfa_mode = hfa_element_mode (type, 0);
4945 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4946 and unprototyped hfas are passed specially. */
4947 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4949 int fp_regs = cum->fp_regs;
4950 /* This is the original value of cum->words + offset. */
4951 int int_regs = cum->words - words;
4952 int hfa_size = GET_MODE_SIZE (hfa_mode);
4953 int byte_size;
4954 int args_byte_size;
4956 /* If prototyped, pass it in FR regs then GR regs.
4957 If not prototyped, pass it in both FR and GR regs.
4959 If this is an SFmode aggregate, then it is possible to run out of
4960 FR regs while GR regs are still left. In that case, we pass the
4961 remaining part in the GR regs. */
4963 /* Fill the FP regs. We do this always. We stop if we reach the end
4964 of the argument, the last FP register, or the last argument slot. */
4966 byte_size = ((mode == BLKmode)
4967 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4968 args_byte_size = int_regs * UNITS_PER_WORD;
4969 offset = 0;
4970 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4971 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4973 offset += hfa_size;
4974 args_byte_size += hfa_size;
4975 fp_regs++;
4978 cum->fp_regs = fp_regs;
4981 /* Integral and aggregates go in general registers. So do TFmode FP values.
4982 If we have run out of FR registers, then other FP values must also go in
4983 general registers. This can happen when we have a SFmode HFA. */
4984 else if (mode == TFmode || mode == TCmode
4985 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4986 cum->int_regs = cum->words;
4988 /* If there is a prototype, then FP values go in a FR register when
4989 named, and in a GR register when unnamed. */
4990 else if (cum->prototype)
4992 if (! named)
4993 cum->int_regs = cum->words;
4994 else
4995 /* ??? Complex types should not reach here. */
4996 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4998 /* If there is no prototype, then FP values go in both FR and GR
4999 registers. */
5000 else
5002 /* ??? Complex types should not reach here. */
5003 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5004 cum->int_regs = cum->words;
5008 /* Arguments with alignment larger than 8 bytes start at the next even
5009 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5010 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5012 static unsigned int
5013 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5015 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5016 return PARM_BOUNDARY * 2;
5018 if (type)
5020 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5021 return PARM_BOUNDARY * 2;
5022 else
5023 return PARM_BOUNDARY;
5026 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5027 return PARM_BOUNDARY * 2;
5028 else
5029 return PARM_BOUNDARY;
5032 /* True if it is OK to do sibling call optimization for the specified
5033 call expression EXP. DECL will be the called function, or NULL if
5034 this is an indirect call. */
5035 static bool
5036 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5038 /* We can't perform a sibcall if the current function has the syscall_linkage
5039 attribute. */
5040 if (lookup_attribute ("syscall_linkage",
5041 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5042 return false;
5044 /* We must always return with our current GP. This means we can
5045 only sibcall to functions defined in the current module unless
5046 TARGET_CONST_GP is set to true. */
5047 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5051 /* Implement va_arg. */
5053 static tree
5054 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5055 gimple_seq *post_p)
5057 /* Variable sized types are passed by reference. */
5058 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5060 tree ptrtype = build_pointer_type (type);
5061 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5062 return build_va_arg_indirect_ref (addr);
5065 /* Aggregate arguments with alignment larger than 8 bytes start at
5066 the next even boundary. Integer and floating point arguments
5067 do so if they are larger than 8 bytes, whether or not they are
5068 also aligned larger than 8 bytes. */
5069 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5070 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5072 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5073 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5074 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5075 gimplify_assign (unshare_expr (valist), t, pre_p);
5078 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5081 /* Return 1 if function return value returned in memory. Return 0 if it is
5082 in a register. */
5084 static bool
5085 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5087 machine_mode mode;
5088 machine_mode hfa_mode;
5089 HOST_WIDE_INT byte_size;
5091 mode = TYPE_MODE (valtype);
5092 byte_size = GET_MODE_SIZE (mode);
5093 if (mode == BLKmode)
5095 byte_size = int_size_in_bytes (valtype);
5096 if (byte_size < 0)
5097 return true;
5100 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5102 hfa_mode = hfa_element_mode (valtype, 0);
5103 if (hfa_mode != VOIDmode)
5105 int hfa_size = GET_MODE_SIZE (hfa_mode);
5107 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5108 return true;
5109 else
5110 return false;
5112 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5113 return true;
5114 else
5115 return false;
5118 /* Return rtx for register that holds the function return value. */
5120 static rtx
5121 ia64_function_value (const_tree valtype,
5122 const_tree fn_decl_or_type,
5123 bool outgoing ATTRIBUTE_UNUSED)
5125 machine_mode mode;
5126 machine_mode hfa_mode;
5127 int unsignedp;
5128 const_tree func = fn_decl_or_type;
5130 if (fn_decl_or_type
5131 && !DECL_P (fn_decl_or_type))
5132 func = NULL;
5134 mode = TYPE_MODE (valtype);
5135 hfa_mode = hfa_element_mode (valtype, 0);
5137 if (hfa_mode != VOIDmode)
5139 rtx loc[8];
5140 int i;
5141 int hfa_size;
5142 int byte_size;
5143 int offset;
5145 hfa_size = GET_MODE_SIZE (hfa_mode);
5146 byte_size = ((mode == BLKmode)
5147 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5148 offset = 0;
5149 for (i = 0; offset < byte_size; i++)
5151 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5152 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5153 GEN_INT (offset));
5154 offset += hfa_size;
5156 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5158 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5159 return gen_rtx_REG (mode, FR_ARG_FIRST);
5160 else
5162 bool need_parallel = false;
5164 /* In big-endian mode, we need to manage the layout of aggregates
5165 in the registers so that we get the bits properly aligned in
5166 the highpart of the registers. */
5167 if (BYTES_BIG_ENDIAN
5168 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5169 need_parallel = true;
5171 /* Something like struct S { long double x; char a[0] } is not an
5172 HFA structure, and therefore doesn't go in fp registers. But
5173 the middle-end will give it XFmode anyway, and XFmode values
5174 don't normally fit in integer registers. So we need to smuggle
5175 the value inside a parallel. */
5176 else if (mode == XFmode || mode == XCmode || mode == RFmode)
5177 need_parallel = true;
5179 if (need_parallel)
5181 rtx loc[8];
5182 int offset;
5183 int bytesize;
5184 int i;
5186 offset = 0;
5187 bytesize = int_size_in_bytes (valtype);
5188 /* An empty PARALLEL is invalid here, but the return value
5189 doesn't matter for empty structs. */
5190 if (bytesize == 0)
5191 return gen_rtx_REG (mode, GR_RET_FIRST);
5192 for (i = 0; offset < bytesize; i++)
5194 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5195 gen_rtx_REG (DImode,
5196 GR_RET_FIRST + i),
5197 GEN_INT (offset));
5198 offset += UNITS_PER_WORD;
5200 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5203 mode = promote_function_mode (valtype, mode, &unsignedp,
5204 func ? TREE_TYPE (func) : NULL_TREE,
5205 true);
5207 return gen_rtx_REG (mode, GR_RET_FIRST);
5211 /* Worker function for TARGET_LIBCALL_VALUE. */
5213 static rtx
5214 ia64_libcall_value (machine_mode mode,
5215 const_rtx fun ATTRIBUTE_UNUSED)
5217 return gen_rtx_REG (mode,
5218 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5219 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5220 && (mode) != TFmode)
5221 ? FR_RET_FIRST : GR_RET_FIRST));
5224 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5226 static bool
5227 ia64_function_value_regno_p (const unsigned int regno)
5229 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5230 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5233 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5234 We need to emit DTP-relative relocations. */
5236 static void
5237 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5239 gcc_assert (size == 4 || size == 8);
5240 if (size == 4)
5241 fputs ("\tdata4.ua\t@dtprel(", file);
5242 else
5243 fputs ("\tdata8.ua\t@dtprel(", file);
5244 output_addr_const (file, x);
5245 fputs (")", file);
5248 /* Print a memory address as an operand to reference that memory location. */
5250 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5251 also call this from ia64_print_operand for memory addresses. */
5253 static void
5254 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5255 machine_mode /*mode*/,
5256 rtx address ATTRIBUTE_UNUSED)
5260 /* Print an operand to an assembler instruction.
5261 C Swap and print a comparison operator.
5262 D Print an FP comparison operator.
5263 E Print 32 - constant, for SImode shifts as extract.
5264 e Print 64 - constant, for DImode rotates.
5265 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5266 a floating point register emitted normally.
5267 G A floating point constant.
5268 I Invert a predicate register by adding 1.
5269 J Select the proper predicate register for a condition.
5270 j Select the inverse predicate register for a condition.
5271 O Append .acq for volatile load.
5272 P Postincrement of a MEM.
5273 Q Append .rel for volatile store.
5274 R Print .s .d or nothing for a single, double or no truncation.
5275 S Shift amount for shladd instruction.
5276 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5277 for Intel assembler.
5278 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5279 for Intel assembler.
5280 X A pair of floating point registers.
5281 r Print register name, or constant 0 as r0. HP compatibility for
5282 Linux kernel.
5283 v Print vector constant value as an 8-byte integer value. */
5285 static void
5286 ia64_print_operand (FILE * file, rtx x, int code)
5288 const char *str;
5290 switch (code)
5292 case 0:
5293 /* Handled below. */
5294 break;
5296 case 'C':
5298 enum rtx_code c = swap_condition (GET_CODE (x));
5299 fputs (GET_RTX_NAME (c), file);
5300 return;
5303 case 'D':
5304 switch (GET_CODE (x))
5306 case NE:
5307 str = "neq";
5308 break;
5309 case UNORDERED:
5310 str = "unord";
5311 break;
5312 case ORDERED:
5313 str = "ord";
5314 break;
5315 case UNLT:
5316 str = "nge";
5317 break;
5318 case UNLE:
5319 str = "ngt";
5320 break;
5321 case UNGT:
5322 str = "nle";
5323 break;
5324 case UNGE:
5325 str = "nlt";
5326 break;
5327 case UNEQ:
5328 case LTGT:
5329 gcc_unreachable ();
5330 default:
5331 str = GET_RTX_NAME (GET_CODE (x));
5332 break;
5334 fputs (str, file);
5335 return;
5337 case 'E':
5338 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5339 return;
5341 case 'e':
5342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5343 return;
5345 case 'F':
5346 if (x == CONST0_RTX (GET_MODE (x)))
5347 str = reg_names [FR_REG (0)];
5348 else if (x == CONST1_RTX (GET_MODE (x)))
5349 str = reg_names [FR_REG (1)];
5350 else
5352 gcc_assert (GET_CODE (x) == REG);
5353 str = reg_names [REGNO (x)];
5355 fputs (str, file);
5356 return;
5358 case 'G':
5360 long val[4];
5361 real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5362 if (GET_MODE (x) == SFmode)
5363 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5364 else if (GET_MODE (x) == DFmode)
5365 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5366 & 0xffffffff,
5367 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5368 & 0xffffffff);
5369 else
5370 output_operand_lossage ("invalid %%G mode");
5372 return;
5374 case 'I':
5375 fputs (reg_names [REGNO (x) + 1], file);
5376 return;
5378 case 'J':
5379 case 'j':
5381 unsigned int regno = REGNO (XEXP (x, 0));
5382 if (GET_CODE (x) == EQ)
5383 regno += 1;
5384 if (code == 'j')
5385 regno ^= 1;
5386 fputs (reg_names [regno], file);
5388 return;
5390 case 'O':
5391 if (MEM_VOLATILE_P (x))
5392 fputs(".acq", file);
5393 return;
5395 case 'P':
5397 HOST_WIDE_INT value;
5399 switch (GET_CODE (XEXP (x, 0)))
5401 default:
5402 return;
5404 case POST_MODIFY:
5405 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5406 if (GET_CODE (x) == CONST_INT)
5407 value = INTVAL (x);
5408 else
5410 gcc_assert (GET_CODE (x) == REG);
5411 fprintf (file, ", %s", reg_names[REGNO (x)]);
5412 return;
5414 break;
5416 case POST_INC:
5417 value = GET_MODE_SIZE (GET_MODE (x));
5418 break;
5420 case POST_DEC:
5421 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5422 break;
5425 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5426 return;
5429 case 'Q':
5430 if (MEM_VOLATILE_P (x))
5431 fputs(".rel", file);
5432 return;
5434 case 'R':
5435 if (x == CONST0_RTX (GET_MODE (x)))
5436 fputs(".s", file);
5437 else if (x == CONST1_RTX (GET_MODE (x)))
5438 fputs(".d", file);
5439 else if (x == CONST2_RTX (GET_MODE (x)))
5441 else
5442 output_operand_lossage ("invalid %%R value");
5443 return;
5445 case 'S':
5446 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5447 return;
5449 case 'T':
5450 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5452 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5453 return;
5455 break;
5457 case 'U':
5458 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5460 const char *prefix = "0x";
5461 if (INTVAL (x) & 0x80000000)
5463 fprintf (file, "0xffffffff");
5464 prefix = "";
5466 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5467 return;
5469 break;
5471 case 'X':
5473 unsigned int regno = REGNO (x);
5474 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5476 return;
5478 case 'r':
5479 /* If this operand is the constant zero, write it as register zero.
5480 Any register, zero, or CONST_INT value is OK here. */
5481 if (GET_CODE (x) == REG)
5482 fputs (reg_names[REGNO (x)], file);
5483 else if (x == CONST0_RTX (GET_MODE (x)))
5484 fputs ("r0", file);
5485 else if (GET_CODE (x) == CONST_INT)
5486 output_addr_const (file, x);
5487 else
5488 output_operand_lossage ("invalid %%r value");
5489 return;
5491 case 'v':
5492 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5493 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5494 break;
5496 case '+':
5498 const char *which;
5500 /* For conditional branches, returns or calls, substitute
5501 sptk, dptk, dpnt, or spnt for %s. */
5502 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5503 if (x)
5505 int pred_val = XINT (x, 0);
5507 /* Guess top and bottom 10% statically predicted. */
5508 if (pred_val < REG_BR_PROB_BASE / 50
5509 && br_prob_note_reliable_p (x))
5510 which = ".spnt";
5511 else if (pred_val < REG_BR_PROB_BASE / 2)
5512 which = ".dpnt";
5513 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5514 || !br_prob_note_reliable_p (x))
5515 which = ".dptk";
5516 else
5517 which = ".sptk";
5519 else if (CALL_P (current_output_insn))
5520 which = ".sptk";
5521 else
5522 which = ".dptk";
5524 fputs (which, file);
5525 return;
5528 case ',':
5529 x = current_insn_predicate;
5530 if (x)
5532 unsigned int regno = REGNO (XEXP (x, 0));
5533 if (GET_CODE (x) == EQ)
5534 regno += 1;
5535 fprintf (file, "(%s) ", reg_names [regno]);
5537 return;
5539 default:
5540 output_operand_lossage ("ia64_print_operand: unknown code");
5541 return;
5544 switch (GET_CODE (x))
5546 /* This happens for the spill/restore instructions. */
5547 case POST_INC:
5548 case POST_DEC:
5549 case POST_MODIFY:
5550 x = XEXP (x, 0);
5551 /* fall through */
5553 case REG:
5554 fputs (reg_names [REGNO (x)], file);
5555 break;
5557 case MEM:
5559 rtx addr = XEXP (x, 0);
5560 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5561 addr = XEXP (addr, 0);
5562 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5563 break;
5566 default:
5567 output_addr_const (file, x);
5568 break;
5571 return;
5574 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5576 static bool
5577 ia64_print_operand_punct_valid_p (unsigned char code)
5579 return (code == '+' || code == ',');
5582 /* Compute a (partial) cost for rtx X. Return true if the complete
5583 cost has been computed, and false if subexpressions should be
5584 scanned. In either case, *TOTAL contains the cost result. */
5585 /* ??? This is incomplete. */
5587 static bool
5588 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5589 int opno ATTRIBUTE_UNUSED,
5590 int *total, bool speed ATTRIBUTE_UNUSED)
5592 int code = GET_CODE (x);
5594 switch (code)
5596 case CONST_INT:
5597 switch (outer_code)
5599 case SET:
5600 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5601 return true;
5602 case PLUS:
5603 if (satisfies_constraint_I (x))
5604 *total = 0;
5605 else if (satisfies_constraint_J (x))
5606 *total = 1;
5607 else
5608 *total = COSTS_N_INSNS (1);
5609 return true;
5610 default:
5611 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5612 *total = 0;
5613 else
5614 *total = COSTS_N_INSNS (1);
5615 return true;
5618 case CONST_DOUBLE:
5619 *total = COSTS_N_INSNS (1);
5620 return true;
5622 case CONST:
5623 case SYMBOL_REF:
5624 case LABEL_REF:
5625 *total = COSTS_N_INSNS (3);
5626 return true;
5628 case FMA:
5629 *total = COSTS_N_INSNS (4);
5630 return true;
5632 case MULT:
5633 /* For multiplies wider than HImode, we have to go to the FPU,
5634 which normally involves copies. Plus there's the latency
5635 of the multiply itself, and the latency of the instructions to
5636 transfer integer regs to FP regs. */
5637 if (FLOAT_MODE_P (mode))
5638 *total = COSTS_N_INSNS (4);
5639 else if (GET_MODE_SIZE (mode) > 2)
5640 *total = COSTS_N_INSNS (10);
5641 else
5642 *total = COSTS_N_INSNS (2);
5643 return true;
5645 case PLUS:
5646 case MINUS:
5647 if (FLOAT_MODE_P (mode))
5649 *total = COSTS_N_INSNS (4);
5650 return true;
5652 /* FALLTHRU */
5654 case ASHIFT:
5655 case ASHIFTRT:
5656 case LSHIFTRT:
5657 *total = COSTS_N_INSNS (1);
5658 return true;
5660 case DIV:
5661 case UDIV:
5662 case MOD:
5663 case UMOD:
5664 /* We make divide expensive, so that divide-by-constant will be
5665 optimized to a multiply. */
5666 *total = COSTS_N_INSNS (60);
5667 return true;
5669 default:
5670 return false;
5674 /* Calculate the cost of moving data from a register in class FROM to
5675 one in class TO, using MODE. */
5677 static int
5678 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5679 reg_class_t to)
5681 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5682 if (to == ADDL_REGS)
5683 to = GR_REGS;
5684 if (from == ADDL_REGS)
5685 from = GR_REGS;
5687 /* All costs are symmetric, so reduce cases by putting the
5688 lower number class as the destination. */
5689 if (from < to)
5691 reg_class_t tmp = to;
5692 to = from, from = tmp;
5695 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5696 so that we get secondary memory reloads. Between FR_REGS,
5697 we have to make this at least as expensive as memory_move_cost
5698 to avoid spectacularly poor register class preferencing. */
5699 if (mode == XFmode || mode == RFmode)
5701 if (to != GR_REGS || from != GR_REGS)
5702 return memory_move_cost (mode, to, false);
5703 else
5704 return 3;
5707 switch (to)
5709 case PR_REGS:
5710 /* Moving between PR registers takes two insns. */
5711 if (from == PR_REGS)
5712 return 3;
5713 /* Moving between PR and anything but GR is impossible. */
5714 if (from != GR_REGS)
5715 return memory_move_cost (mode, to, false);
5716 break;
5718 case BR_REGS:
5719 /* Moving between BR and anything but GR is impossible. */
5720 if (from != GR_REGS && from != GR_AND_BR_REGS)
5721 return memory_move_cost (mode, to, false);
5722 break;
5724 case AR_I_REGS:
5725 case AR_M_REGS:
5726 /* Moving between AR and anything but GR is impossible. */
5727 if (from != GR_REGS)
5728 return memory_move_cost (mode, to, false);
5729 break;
5731 case GR_REGS:
5732 case FR_REGS:
5733 case FP_REGS:
5734 case GR_AND_FR_REGS:
5735 case GR_AND_BR_REGS:
5736 case ALL_REGS:
5737 break;
5739 default:
5740 gcc_unreachable ();
5743 return 2;
5746 /* Calculate the cost of moving data of MODE from a register to or from
5747 memory. */
5749 static int
5750 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5751 reg_class_t rclass,
5752 bool in ATTRIBUTE_UNUSED)
5754 if (rclass == GENERAL_REGS
5755 || rclass == FR_REGS
5756 || rclass == FP_REGS
5757 || rclass == GR_AND_FR_REGS)
5758 return 4;
5759 else
5760 return 10;
5763 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5764 on RCLASS to use when copying X into that class. */
5766 static reg_class_t
5767 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5769 switch (rclass)
5771 case FR_REGS:
5772 case FP_REGS:
5773 /* Don't allow volatile mem reloads into floating point registers.
5774 This is defined to force reload to choose the r/m case instead
5775 of the f/f case when reloading (set (reg fX) (mem/v)). */
5776 if (MEM_P (x) && MEM_VOLATILE_P (x))
5777 return NO_REGS;
5779 /* Force all unrecognized constants into the constant pool. */
5780 if (CONSTANT_P (x))
5781 return NO_REGS;
5782 break;
5784 case AR_M_REGS:
5785 case AR_I_REGS:
5786 if (!OBJECT_P (x))
5787 return NO_REGS;
5788 break;
5790 default:
5791 break;
5794 return rclass;
5797 /* This function returns the register class required for a secondary
5798 register when copying between one of the registers in RCLASS, and X,
5799 using MODE. A return value of NO_REGS means that no secondary register
5800 is required. */
5802 enum reg_class
5803 ia64_secondary_reload_class (enum reg_class rclass,
5804 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5806 int regno = -1;
5808 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5809 regno = true_regnum (x);
5811 switch (rclass)
5813 case BR_REGS:
5814 case AR_M_REGS:
5815 case AR_I_REGS:
5816 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5817 interaction. We end up with two pseudos with overlapping lifetimes
5818 both of which are equiv to the same constant, and both which need
5819 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5820 changes depending on the path length, which means the qty_first_reg
5821 check in make_regs_eqv can give different answers at different times.
5822 At some point I'll probably need a reload_indi pattern to handle
5823 this.
5825 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5826 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5827 non-general registers for good measure. */
5828 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5829 return GR_REGS;
5831 /* This is needed if a pseudo used as a call_operand gets spilled to a
5832 stack slot. */
5833 if (GET_CODE (x) == MEM)
5834 return GR_REGS;
5835 break;
5837 case FR_REGS:
5838 case FP_REGS:
5839 /* Need to go through general registers to get to other class regs. */
5840 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5841 return GR_REGS;
5843 /* This can happen when a paradoxical subreg is an operand to the
5844 muldi3 pattern. */
5845 /* ??? This shouldn't be necessary after instruction scheduling is
5846 enabled, because paradoxical subregs are not accepted by
5847 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5848 stop the paradoxical subreg stupidity in the *_operand functions
5849 in recog.c. */
5850 if (GET_CODE (x) == MEM
5851 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5852 || GET_MODE (x) == QImode))
5853 return GR_REGS;
5855 /* This can happen because of the ior/and/etc patterns that accept FP
5856 registers as operands. If the third operand is a constant, then it
5857 needs to be reloaded into a FP register. */
5858 if (GET_CODE (x) == CONST_INT)
5859 return GR_REGS;
5861 /* This can happen because of register elimination in a muldi3 insn.
5862 E.g. `26107 * (unsigned long)&u'. */
5863 if (GET_CODE (x) == PLUS)
5864 return GR_REGS;
5865 break;
5867 case PR_REGS:
5868 /* ??? This happens if we cse/gcse a BImode value across a call,
5869 and the function has a nonlocal goto. This is because global
5870 does not allocate call crossing pseudos to hard registers when
5871 crtl->has_nonlocal_goto is true. This is relatively
5872 common for C++ programs that use exceptions. To reproduce,
5873 return NO_REGS and compile libstdc++. */
5874 if (GET_CODE (x) == MEM)
5875 return GR_REGS;
5877 /* This can happen when we take a BImode subreg of a DImode value,
5878 and that DImode value winds up in some non-GR register. */
5879 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5880 return GR_REGS;
5881 break;
5883 default:
5884 break;
5887 return NO_REGS;
5891 /* Implement targetm.unspec_may_trap_p hook. */
5892 static int
5893 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5895 switch (XINT (x, 1))
5897 case UNSPEC_LDA:
5898 case UNSPEC_LDS:
5899 case UNSPEC_LDSA:
5900 case UNSPEC_LDCCLR:
5901 case UNSPEC_CHKACLR:
5902 case UNSPEC_CHKS:
5903 /* These unspecs are just wrappers. */
5904 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5907 return default_unspec_may_trap_p (x, flags);
5911 /* Parse the -mfixed-range= option string. */
5913 static void
5914 fix_range (const char *const_str)
5916 int i, first, last;
5917 char *str, *dash, *comma;
5919 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5920 REG2 are either register names or register numbers. The effect
5921 of this option is to mark the registers in the range from REG1 to
5922 REG2 as ``fixed'' so they won't be used by the compiler. This is
5923 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5925 i = strlen (const_str);
5926 str = (char *) alloca (i + 1);
5927 memcpy (str, const_str, i + 1);
5929 while (1)
5931 dash = strchr (str, '-');
5932 if (!dash)
5934 warning (0, "value of -mfixed-range must have form REG1-REG2");
5935 return;
5937 *dash = '\0';
5939 comma = strchr (dash + 1, ',');
5940 if (comma)
5941 *comma = '\0';
5943 first = decode_reg_name (str);
5944 if (first < 0)
5946 warning (0, "unknown register name: %s", str);
5947 return;
5950 last = decode_reg_name (dash + 1);
5951 if (last < 0)
5953 warning (0, "unknown register name: %s", dash + 1);
5954 return;
5957 *dash = '-';
5959 if (first > last)
5961 warning (0, "%s-%s is an empty range", str, dash + 1);
5962 return;
5965 for (i = first; i <= last; ++i)
5966 fixed_regs[i] = call_used_regs[i] = 1;
5968 if (!comma)
5969 break;
5971 *comma = ',';
5972 str = comma + 1;
5976 /* Implement TARGET_OPTION_OVERRIDE. */
5978 static void
5979 ia64_option_override (void)
5981 unsigned int i;
5982 cl_deferred_option *opt;
5983 vec<cl_deferred_option> *v
5984 = (vec<cl_deferred_option> *) ia64_deferred_options;
5986 if (v)
5987 FOR_EACH_VEC_ELT (*v, i, opt)
5989 switch (opt->opt_index)
5991 case OPT_mfixed_range_:
5992 fix_range (opt->arg);
5993 break;
5995 default:
5996 gcc_unreachable ();
6000 if (TARGET_AUTO_PIC)
6001 target_flags |= MASK_CONST_GP;
6003 /* Numerous experiment shows that IRA based loop pressure
6004 calculation works better for RTL loop invariant motion on targets
6005 with enough (>= 32) registers. It is an expensive optimization.
6006 So it is on only for peak performance. */
6007 if (optimize >= 3)
6008 flag_ira_loop_pressure = 1;
6011 ia64_section_threshold = (global_options_set.x_g_switch_value
6012 ? g_switch_value
6013 : IA64_DEFAULT_GVALUE);
6015 init_machine_status = ia64_init_machine_status;
6017 if (align_functions <= 0)
6018 align_functions = 64;
6019 if (align_loops <= 0)
6020 align_loops = 32;
6021 if (TARGET_ABI_OPEN_VMS)
6022 flag_no_common = 1;
6024 ia64_override_options_after_change();
6027 /* Implement targetm.override_options_after_change. */
6029 static void
6030 ia64_override_options_after_change (void)
6032 if (optimize >= 3
6033 && !global_options_set.x_flag_selective_scheduling
6034 && !global_options_set.x_flag_selective_scheduling2)
6036 flag_selective_scheduling2 = 1;
6037 flag_sel_sched_pipelining = 1;
6039 if (mflag_sched_control_spec == 2)
6041 /* Control speculation is on by default for the selective scheduler,
6042 but not for the Haifa scheduler. */
6043 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6045 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6047 /* FIXME: remove this when we'd implement breaking autoinsns as
6048 a transformation. */
6049 flag_auto_inc_dec = 0;
6053 /* Initialize the record of emitted frame related registers. */
6055 void ia64_init_expanders (void)
6057 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6060 static struct machine_function *
6061 ia64_init_machine_status (void)
6063 return ggc_cleared_alloc<machine_function> ();
6066 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6067 static enum attr_type ia64_safe_type (rtx_insn *);
6069 static enum attr_itanium_class
6070 ia64_safe_itanium_class (rtx_insn *insn)
6072 if (recog_memoized (insn) >= 0)
6073 return get_attr_itanium_class (insn);
6074 else if (DEBUG_INSN_P (insn))
6075 return ITANIUM_CLASS_IGNORE;
6076 else
6077 return ITANIUM_CLASS_UNKNOWN;
6080 static enum attr_type
6081 ia64_safe_type (rtx_insn *insn)
6083 if (recog_memoized (insn) >= 0)
6084 return get_attr_type (insn);
6085 else
6086 return TYPE_UNKNOWN;
6089 /* The following collection of routines emit instruction group stop bits as
6090 necessary to avoid dependencies. */
6092 /* Need to track some additional registers as far as serialization is
6093 concerned so we can properly handle br.call and br.ret. We could
6094 make these registers visible to gcc, but since these registers are
6095 never explicitly used in gcc generated code, it seems wasteful to
6096 do so (plus it would make the call and return patterns needlessly
6097 complex). */
6098 #define REG_RP (BR_REG (0))
6099 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6100 /* This is used for volatile asms which may require a stop bit immediately
6101 before and after them. */
6102 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6103 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6104 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6106 /* For each register, we keep track of how it has been written in the
6107 current instruction group.
6109 If a register is written unconditionally (no qualifying predicate),
6110 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6112 If a register is written if its qualifying predicate P is true, we
6113 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6114 may be written again by the complement of P (P^1) and when this happens,
6115 WRITE_COUNT gets set to 2.
6117 The result of this is that whenever an insn attempts to write a register
6118 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6120 If a predicate register is written by a floating-point insn, we set
6121 WRITTEN_BY_FP to true.
6123 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6124 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6126 #if GCC_VERSION >= 4000
6127 #define RWS_FIELD_TYPE __extension__ unsigned short
6128 #else
6129 #define RWS_FIELD_TYPE unsigned int
6130 #endif
6131 struct reg_write_state
6133 RWS_FIELD_TYPE write_count : 2;
6134 RWS_FIELD_TYPE first_pred : 10;
6135 RWS_FIELD_TYPE written_by_fp : 1;
6136 RWS_FIELD_TYPE written_by_and : 1;
6137 RWS_FIELD_TYPE written_by_or : 1;
6140 /* Cumulative info for the current instruction group. */
6141 struct reg_write_state rws_sum[NUM_REGS];
6142 #if CHECKING_P
6143 /* Bitmap whether a register has been written in the current insn. */
6144 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6145 / HOST_BITS_PER_WIDEST_FAST_INT];
6147 static inline void
6148 rws_insn_set (int regno)
6150 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6151 SET_HARD_REG_BIT (rws_insn, regno);
6154 static inline int
6155 rws_insn_test (int regno)
6157 return TEST_HARD_REG_BIT (rws_insn, regno);
6159 #else
6160 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6161 unsigned char rws_insn[2];
6163 static inline void
6164 rws_insn_set (int regno)
6166 if (regno == REG_AR_CFM)
6167 rws_insn[0] = 1;
6168 else if (regno == REG_VOLATILE)
6169 rws_insn[1] = 1;
6172 static inline int
6173 rws_insn_test (int regno)
6175 if (regno == REG_AR_CFM)
6176 return rws_insn[0];
6177 if (regno == REG_VOLATILE)
6178 return rws_insn[1];
6179 return 0;
6181 #endif
6183 /* Indicates whether this is the first instruction after a stop bit,
6184 in which case we don't need another stop bit. Without this,
6185 ia64_variable_issue will die when scheduling an alloc. */
6186 static int first_instruction;
6188 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6189 RTL for one instruction. */
6190 struct reg_flags
6192 unsigned int is_write : 1; /* Is register being written? */
6193 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6194 unsigned int is_branch : 1; /* Is register used as part of a branch? */
6195 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6196 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
6197 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6200 static void rws_update (int, struct reg_flags, int);
6201 static int rws_access_regno (int, struct reg_flags, int);
6202 static int rws_access_reg (rtx, struct reg_flags, int);
6203 static void update_set_flags (rtx, struct reg_flags *);
6204 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6205 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6206 static void init_insn_group_barriers (void);
6207 static int group_barrier_needed (rtx_insn *);
6208 static int safe_group_barrier_needed (rtx_insn *);
6209 static int in_safe_group_barrier;
6211 /* Update *RWS for REGNO, which is being written by the current instruction,
6212 with predicate PRED, and associated register flags in FLAGS. */
6214 static void
6215 rws_update (int regno, struct reg_flags flags, int pred)
6217 if (pred)
6218 rws_sum[regno].write_count++;
6219 else
6220 rws_sum[regno].write_count = 2;
6221 rws_sum[regno].written_by_fp |= flags.is_fp;
6222 /* ??? Not tracking and/or across differing predicates. */
6223 rws_sum[regno].written_by_and = flags.is_and;
6224 rws_sum[regno].written_by_or = flags.is_or;
6225 rws_sum[regno].first_pred = pred;
6228 /* Handle an access to register REGNO of type FLAGS using predicate register
6229 PRED. Update rws_sum array. Return 1 if this access creates
6230 a dependency with an earlier instruction in the same group. */
6232 static int
6233 rws_access_regno (int regno, struct reg_flags flags, int pred)
6235 int need_barrier = 0;
6237 gcc_assert (regno < NUM_REGS);
6239 if (! PR_REGNO_P (regno))
6240 flags.is_and = flags.is_or = 0;
6242 if (flags.is_write)
6244 int write_count;
6246 rws_insn_set (regno);
6247 write_count = rws_sum[regno].write_count;
6249 switch (write_count)
6251 case 0:
6252 /* The register has not been written yet. */
6253 if (!in_safe_group_barrier)
6254 rws_update (regno, flags, pred);
6255 break;
6257 case 1:
6258 /* The register has been written via a predicate. Treat
6259 it like a unconditional write and do not try to check
6260 for complementary pred reg in earlier write. */
6261 if (flags.is_and && rws_sum[regno].written_by_and)
6263 else if (flags.is_or && rws_sum[regno].written_by_or)
6265 else
6266 need_barrier = 1;
6267 if (!in_safe_group_barrier)
6268 rws_update (regno, flags, pred);
6269 break;
6271 case 2:
6272 /* The register has been unconditionally written already. We
6273 need a barrier. */
6274 if (flags.is_and && rws_sum[regno].written_by_and)
6276 else if (flags.is_or && rws_sum[regno].written_by_or)
6278 else
6279 need_barrier = 1;
6280 if (!in_safe_group_barrier)
6282 rws_sum[regno].written_by_and = flags.is_and;
6283 rws_sum[regno].written_by_or = flags.is_or;
6285 break;
6287 default:
6288 gcc_unreachable ();
6291 else
6293 if (flags.is_branch)
6295 /* Branches have several RAW exceptions that allow to avoid
6296 barriers. */
6298 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6299 /* RAW dependencies on branch regs are permissible as long
6300 as the writer is a non-branch instruction. Since we
6301 never generate code that uses a branch register written
6302 by a branch instruction, handling this case is
6303 easy. */
6304 return 0;
6306 if (REGNO_REG_CLASS (regno) == PR_REGS
6307 && ! rws_sum[regno].written_by_fp)
6308 /* The predicates of a branch are available within the
6309 same insn group as long as the predicate was written by
6310 something other than a floating-point instruction. */
6311 return 0;
6314 if (flags.is_and && rws_sum[regno].written_by_and)
6315 return 0;
6316 if (flags.is_or && rws_sum[regno].written_by_or)
6317 return 0;
6319 switch (rws_sum[regno].write_count)
6321 case 0:
6322 /* The register has not been written yet. */
6323 break;
6325 case 1:
6326 /* The register has been written via a predicate, assume we
6327 need a barrier (don't check for complementary regs). */
6328 need_barrier = 1;
6329 break;
6331 case 2:
6332 /* The register has been unconditionally written already. We
6333 need a barrier. */
6334 need_barrier = 1;
6335 break;
6337 default:
6338 gcc_unreachable ();
6342 return need_barrier;
6345 static int
6346 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6348 int regno = REGNO (reg);
6349 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6351 if (n == 1)
6352 return rws_access_regno (regno, flags, pred);
6353 else
6355 int need_barrier = 0;
6356 while (--n >= 0)
6357 need_barrier |= rws_access_regno (regno + n, flags, pred);
6358 return need_barrier;
6362 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6363 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6365 static void
6366 update_set_flags (rtx x, struct reg_flags *pflags)
6368 rtx src = SET_SRC (x);
6370 switch (GET_CODE (src))
6372 case CALL:
6373 return;
6375 case IF_THEN_ELSE:
6376 /* There are four cases here:
6377 (1) The destination is (pc), in which case this is a branch,
6378 nothing here applies.
6379 (2) The destination is ar.lc, in which case this is a
6380 doloop_end_internal,
6381 (3) The destination is an fp register, in which case this is
6382 an fselect instruction.
6383 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6384 this is a check load.
6385 In all cases, nothing we do in this function applies. */
6386 return;
6388 default:
6389 if (COMPARISON_P (src)
6390 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6391 /* Set pflags->is_fp to 1 so that we know we're dealing
6392 with a floating point comparison when processing the
6393 destination of the SET. */
6394 pflags->is_fp = 1;
6396 /* Discover if this is a parallel comparison. We only handle
6397 and.orcm and or.andcm at present, since we must retain a
6398 strict inverse on the predicate pair. */
6399 else if (GET_CODE (src) == AND)
6400 pflags->is_and = 1;
6401 else if (GET_CODE (src) == IOR)
6402 pflags->is_or = 1;
6404 break;
6408 /* Subroutine of rtx_needs_barrier; this function determines whether the
6409 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6410 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6411 for this insn. */
6413 static int
6414 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6416 int need_barrier = 0;
6417 rtx dst;
6418 rtx src = SET_SRC (x);
6420 if (GET_CODE (src) == CALL)
6421 /* We don't need to worry about the result registers that
6422 get written by subroutine call. */
6423 return rtx_needs_barrier (src, flags, pred);
6424 else if (SET_DEST (x) == pc_rtx)
6426 /* X is a conditional branch. */
6427 /* ??? This seems redundant, as the caller sets this bit for
6428 all JUMP_INSNs. */
6429 if (!ia64_spec_check_src_p (src))
6430 flags.is_branch = 1;
6431 return rtx_needs_barrier (src, flags, pred);
6434 if (ia64_spec_check_src_p (src))
6435 /* Avoid checking one register twice (in condition
6436 and in 'then' section) for ldc pattern. */
6438 gcc_assert (REG_P (XEXP (src, 2)));
6439 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6441 /* We process MEM below. */
6442 src = XEXP (src, 1);
6445 need_barrier |= rtx_needs_barrier (src, flags, pred);
6447 dst = SET_DEST (x);
6448 if (GET_CODE (dst) == ZERO_EXTRACT)
6450 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6451 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6453 return need_barrier;
6456 /* Handle an access to rtx X of type FLAGS using predicate register
6457 PRED. Return 1 if this access creates a dependency with an earlier
6458 instruction in the same group. */
6460 static int
6461 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6463 int i, j;
6464 int is_complemented = 0;
6465 int need_barrier = 0;
6466 const char *format_ptr;
6467 struct reg_flags new_flags;
6468 rtx cond;
6470 if (! x)
6471 return 0;
6473 new_flags = flags;
6475 switch (GET_CODE (x))
6477 case SET:
6478 update_set_flags (x, &new_flags);
6479 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6480 if (GET_CODE (SET_SRC (x)) != CALL)
6482 new_flags.is_write = 1;
6483 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6485 break;
6487 case CALL:
6488 new_flags.is_write = 0;
6489 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6491 /* Avoid multiple register writes, in case this is a pattern with
6492 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6493 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6495 new_flags.is_write = 1;
6496 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6497 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6498 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6500 break;
6502 case COND_EXEC:
6503 /* X is a predicated instruction. */
6505 cond = COND_EXEC_TEST (x);
6506 gcc_assert (!pred);
6507 need_barrier = rtx_needs_barrier (cond, flags, 0);
6509 if (GET_CODE (cond) == EQ)
6510 is_complemented = 1;
6511 cond = XEXP (cond, 0);
6512 gcc_assert (GET_CODE (cond) == REG
6513 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6514 pred = REGNO (cond);
6515 if (is_complemented)
6516 ++pred;
6518 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6519 return need_barrier;
6521 case CLOBBER:
6522 case USE:
6523 /* Clobber & use are for earlier compiler-phases only. */
6524 break;
6526 case ASM_OPERANDS:
6527 case ASM_INPUT:
6528 /* We always emit stop bits for traditional asms. We emit stop bits
6529 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6530 if (GET_CODE (x) != ASM_OPERANDS
6531 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6533 /* Avoid writing the register multiple times if we have multiple
6534 asm outputs. This avoids a failure in rws_access_reg. */
6535 if (! rws_insn_test (REG_VOLATILE))
6537 new_flags.is_write = 1;
6538 rws_access_regno (REG_VOLATILE, new_flags, pred);
6540 return 1;
6543 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6544 We cannot just fall through here since then we would be confused
6545 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6546 traditional asms unlike their normal usage. */
6548 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6549 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6550 need_barrier = 1;
6551 break;
6553 case PARALLEL:
6554 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6556 rtx pat = XVECEXP (x, 0, i);
6557 switch (GET_CODE (pat))
6559 case SET:
6560 update_set_flags (pat, &new_flags);
6561 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6562 break;
6564 case USE:
6565 case CALL:
6566 case ASM_OPERANDS:
6567 case ASM_INPUT:
6568 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6569 break;
6571 case CLOBBER:
6572 if (REG_P (XEXP (pat, 0))
6573 && extract_asm_operands (x) != NULL_RTX
6574 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6576 new_flags.is_write = 1;
6577 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6578 new_flags, pred);
6579 new_flags = flags;
6581 break;
6583 case RETURN:
6584 break;
6586 default:
6587 gcc_unreachable ();
6590 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6592 rtx pat = XVECEXP (x, 0, i);
6593 if (GET_CODE (pat) == SET)
6595 if (GET_CODE (SET_SRC (pat)) != CALL)
6597 new_flags.is_write = 1;
6598 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6599 pred);
6602 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6603 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6605 break;
6607 case SUBREG:
6608 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6609 break;
6610 case REG:
6611 if (REGNO (x) == AR_UNAT_REGNUM)
6613 for (i = 0; i < 64; ++i)
6614 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6616 else
6617 need_barrier = rws_access_reg (x, flags, pred);
6618 break;
6620 case MEM:
6621 /* Find the regs used in memory address computation. */
6622 new_flags.is_write = 0;
6623 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6624 break;
6626 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6627 case SYMBOL_REF: case LABEL_REF: case CONST:
6628 break;
6630 /* Operators with side-effects. */
6631 case POST_INC: case POST_DEC:
6632 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6634 new_flags.is_write = 0;
6635 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6636 new_flags.is_write = 1;
6637 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6638 break;
6640 case POST_MODIFY:
6641 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6643 new_flags.is_write = 0;
6644 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6645 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6646 new_flags.is_write = 1;
6647 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6648 break;
6650 /* Handle common unary and binary ops for efficiency. */
6651 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6652 case MOD: case UDIV: case UMOD: case AND: case IOR:
6653 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6654 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6655 case NE: case EQ: case GE: case GT: case LE:
6656 case LT: case GEU: case GTU: case LEU: case LTU:
6657 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6658 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6659 break;
6661 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6662 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6663 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6664 case SQRT: case FFS: case POPCOUNT:
6665 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6666 break;
6668 case VEC_SELECT:
6669 /* VEC_SELECT's second argument is a PARALLEL with integers that
6670 describe the elements selected. On ia64, those integers are
6671 always constants. Avoid walking the PARALLEL so that we don't
6672 get confused with "normal" parallels and then die. */
6673 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6674 break;
6676 case UNSPEC:
6677 switch (XINT (x, 1))
6679 case UNSPEC_LTOFF_DTPMOD:
6680 case UNSPEC_LTOFF_DTPREL:
6681 case UNSPEC_DTPREL:
6682 case UNSPEC_LTOFF_TPREL:
6683 case UNSPEC_TPREL:
6684 case UNSPEC_PRED_REL_MUTEX:
6685 case UNSPEC_PIC_CALL:
6686 case UNSPEC_MF:
6687 case UNSPEC_FETCHADD_ACQ:
6688 case UNSPEC_FETCHADD_REL:
6689 case UNSPEC_BSP_VALUE:
6690 case UNSPEC_FLUSHRS:
6691 case UNSPEC_BUNDLE_SELECTOR:
6692 break;
6694 case UNSPEC_GR_SPILL:
6695 case UNSPEC_GR_RESTORE:
6697 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6698 HOST_WIDE_INT bit = (offset >> 3) & 63;
6700 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6701 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6702 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6703 new_flags, pred);
6704 break;
6707 case UNSPEC_FR_SPILL:
6708 case UNSPEC_FR_RESTORE:
6709 case UNSPEC_GETF_EXP:
6710 case UNSPEC_SETF_EXP:
6711 case UNSPEC_ADDP4:
6712 case UNSPEC_FR_SQRT_RECIP_APPROX:
6713 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6714 case UNSPEC_LDA:
6715 case UNSPEC_LDS:
6716 case UNSPEC_LDS_A:
6717 case UNSPEC_LDSA:
6718 case UNSPEC_CHKACLR:
6719 case UNSPEC_CHKS:
6720 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6721 break;
6723 case UNSPEC_FR_RECIP_APPROX:
6724 case UNSPEC_SHRP:
6725 case UNSPEC_COPYSIGN:
6726 case UNSPEC_FR_RECIP_APPROX_RES:
6727 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6728 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6729 break;
6731 case UNSPEC_CMPXCHG_ACQ:
6732 case UNSPEC_CMPXCHG_REL:
6733 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6734 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6735 break;
6737 default:
6738 gcc_unreachable ();
6740 break;
6742 case UNSPEC_VOLATILE:
6743 switch (XINT (x, 1))
6745 case UNSPECV_ALLOC:
6746 /* Alloc must always be the first instruction of a group.
6747 We force this by always returning true. */
6748 /* ??? We might get better scheduling if we explicitly check for
6749 input/local/output register dependencies, and modify the
6750 scheduler so that alloc is always reordered to the start of
6751 the current group. We could then eliminate all of the
6752 first_instruction code. */
6753 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6755 new_flags.is_write = 1;
6756 rws_access_regno (REG_AR_CFM, new_flags, pred);
6757 return 1;
6759 case UNSPECV_SET_BSP:
6760 case UNSPECV_PROBE_STACK_RANGE:
6761 need_barrier = 1;
6762 break;
6764 case UNSPECV_BLOCKAGE:
6765 case UNSPECV_INSN_GROUP_BARRIER:
6766 case UNSPECV_BREAK:
6767 case UNSPECV_PSAC_ALL:
6768 case UNSPECV_PSAC_NORMAL:
6769 return 0;
6771 case UNSPECV_PROBE_STACK_ADDRESS:
6772 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6773 break;
6775 default:
6776 gcc_unreachable ();
6778 break;
6780 case RETURN:
6781 new_flags.is_write = 0;
6782 need_barrier = rws_access_regno (REG_RP, flags, pred);
6783 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6785 new_flags.is_write = 1;
6786 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6787 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6788 break;
6790 default:
6791 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6792 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6793 switch (format_ptr[i])
6795 case '0': /* unused field */
6796 case 'i': /* integer */
6797 case 'n': /* note */
6798 case 'w': /* wide integer */
6799 case 's': /* pointer to string */
6800 case 'S': /* optional pointer to string */
6801 break;
6803 case 'e':
6804 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6805 need_barrier = 1;
6806 break;
6808 case 'E':
6809 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6810 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6811 need_barrier = 1;
6812 break;
6814 default:
6815 gcc_unreachable ();
6817 break;
6819 return need_barrier;
6822 /* Clear out the state for group_barrier_needed at the start of a
6823 sequence of insns. */
6825 static void
6826 init_insn_group_barriers (void)
6828 memset (rws_sum, 0, sizeof (rws_sum));
6829 first_instruction = 1;
6832 /* Given the current state, determine whether a group barrier (a stop bit) is
6833 necessary before INSN. Return nonzero if so. This modifies the state to
6834 include the effects of INSN as a side-effect. */
6836 static int
6837 group_barrier_needed (rtx_insn *insn)
6839 rtx pat;
6840 int need_barrier = 0;
6841 struct reg_flags flags;
6843 memset (&flags, 0, sizeof (flags));
6844 switch (GET_CODE (insn))
6846 case NOTE:
6847 case DEBUG_INSN:
6848 break;
6850 case BARRIER:
6851 /* A barrier doesn't imply an instruction group boundary. */
6852 break;
6854 case CODE_LABEL:
6855 memset (rws_insn, 0, sizeof (rws_insn));
6856 return 1;
6858 case CALL_INSN:
6859 flags.is_branch = 1;
6860 flags.is_sibcall = SIBLING_CALL_P (insn);
6861 memset (rws_insn, 0, sizeof (rws_insn));
6863 /* Don't bundle a call following another call. */
6864 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6866 need_barrier = 1;
6867 break;
6870 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6871 break;
6873 case JUMP_INSN:
6874 if (!ia64_spec_check_p (insn))
6875 flags.is_branch = 1;
6877 /* Don't bundle a jump following a call. */
6878 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6880 need_barrier = 1;
6881 break;
6883 /* FALLTHRU */
6885 case INSN:
6886 if (GET_CODE (PATTERN (insn)) == USE
6887 || GET_CODE (PATTERN (insn)) == CLOBBER)
6888 /* Don't care about USE and CLOBBER "insns"---those are used to
6889 indicate to the optimizer that it shouldn't get rid of
6890 certain operations. */
6891 break;
6893 pat = PATTERN (insn);
6895 /* Ug. Hack hacks hacked elsewhere. */
6896 switch (recog_memoized (insn))
6898 /* We play dependency tricks with the epilogue in order
6899 to get proper schedules. Undo this for dv analysis. */
6900 case CODE_FOR_epilogue_deallocate_stack:
6901 case CODE_FOR_prologue_allocate_stack:
6902 pat = XVECEXP (pat, 0, 0);
6903 break;
6905 /* The pattern we use for br.cloop confuses the code above.
6906 The second element of the vector is representative. */
6907 case CODE_FOR_doloop_end_internal:
6908 pat = XVECEXP (pat, 0, 1);
6909 break;
6911 /* Doesn't generate code. */
6912 case CODE_FOR_pred_rel_mutex:
6913 case CODE_FOR_prologue_use:
6914 return 0;
6916 default:
6917 break;
6920 memset (rws_insn, 0, sizeof (rws_insn));
6921 need_barrier = rtx_needs_barrier (pat, flags, 0);
6923 /* Check to see if the previous instruction was a volatile
6924 asm. */
6925 if (! need_barrier)
6926 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6928 break;
6930 default:
6931 gcc_unreachable ();
6934 if (first_instruction && important_for_bundling_p (insn))
6936 need_barrier = 0;
6937 first_instruction = 0;
6940 return need_barrier;
6943 /* Like group_barrier_needed, but do not clobber the current state. */
6945 static int
6946 safe_group_barrier_needed (rtx_insn *insn)
6948 int saved_first_instruction;
6949 int t;
6951 saved_first_instruction = first_instruction;
6952 in_safe_group_barrier = 1;
6954 t = group_barrier_needed (insn);
6956 first_instruction = saved_first_instruction;
6957 in_safe_group_barrier = 0;
6959 return t;
6962 /* Scan the current function and insert stop bits as necessary to
6963 eliminate dependencies. This function assumes that a final
6964 instruction scheduling pass has been run which has already
6965 inserted most of the necessary stop bits. This function only
6966 inserts new ones at basic block boundaries, since these are
6967 invisible to the scheduler. */
6969 static void
6970 emit_insn_group_barriers (FILE *dump)
6972 rtx_insn *insn;
6973 rtx_insn *last_label = 0;
6974 int insns_since_last_label = 0;
6976 init_insn_group_barriers ();
6978 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6980 if (LABEL_P (insn))
6982 if (insns_since_last_label)
6983 last_label = insn;
6984 insns_since_last_label = 0;
6986 else if (NOTE_P (insn)
6987 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6989 if (insns_since_last_label)
6990 last_label = insn;
6991 insns_since_last_label = 0;
6993 else if (NONJUMP_INSN_P (insn)
6994 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6995 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6997 init_insn_group_barriers ();
6998 last_label = 0;
7000 else if (NONDEBUG_INSN_P (insn))
7002 insns_since_last_label = 1;
7004 if (group_barrier_needed (insn))
7006 if (last_label)
7008 if (dump)
7009 fprintf (dump, "Emitting stop before label %d\n",
7010 INSN_UID (last_label));
7011 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7012 insn = last_label;
7014 init_insn_group_barriers ();
7015 last_label = 0;
7022 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7023 This function has to emit all necessary group barriers. */
7025 static void
7026 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7028 rtx_insn *insn;
7030 init_insn_group_barriers ();
7032 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7034 if (BARRIER_P (insn))
7036 rtx_insn *last = prev_active_insn (insn);
7038 if (! last)
7039 continue;
7040 if (JUMP_TABLE_DATA_P (last))
7041 last = prev_active_insn (last);
7042 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7043 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7045 init_insn_group_barriers ();
7047 else if (NONDEBUG_INSN_P (insn))
7049 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7050 init_insn_group_barriers ();
7051 else if (group_barrier_needed (insn))
7053 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7054 init_insn_group_barriers ();
7055 group_barrier_needed (insn);
7063 /* Instruction scheduling support. */
7065 #define NR_BUNDLES 10
7067 /* A list of names of all available bundles. */
7069 static const char *bundle_name [NR_BUNDLES] =
7071 ".mii",
7072 ".mmi",
7073 ".mfi",
7074 ".mmf",
7075 #if NR_BUNDLES == 10
7076 ".bbb",
7077 ".mbb",
7078 #endif
7079 ".mib",
7080 ".mmb",
7081 ".mfb",
7082 ".mlx"
7085 /* Nonzero if we should insert stop bits into the schedule. */
7087 int ia64_final_schedule = 0;
7089 /* Codes of the corresponding queried units: */
7091 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7092 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7094 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7095 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7097 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7099 /* The following variable value is an insn group barrier. */
7101 static rtx_insn *dfa_stop_insn;
7103 /* The following variable value is the last issued insn. */
7105 static rtx_insn *last_scheduled_insn;
7107 /* The following variable value is pointer to a DFA state used as
7108 temporary variable. */
7110 static state_t temp_dfa_state = NULL;
7112 /* The following variable value is DFA state after issuing the last
7113 insn. */
7115 static state_t prev_cycle_state = NULL;
7117 /* The following array element values are TRUE if the corresponding
7118 insn requires to add stop bits before it. */
7120 static char *stops_p = NULL;
7122 /* The following variable is used to set up the mentioned above array. */
7124 static int stop_before_p = 0;
7126 /* The following variable value is length of the arrays `clocks' and
7127 `add_cycles'. */
7129 static int clocks_length;
7131 /* The following variable value is number of data speculations in progress. */
7132 static int pending_data_specs = 0;
7134 /* Number of memory references on current and three future processor cycles. */
7135 static char mem_ops_in_group[4];
7137 /* Number of current processor cycle (from scheduler's point of view). */
7138 static int current_cycle;
7140 static rtx ia64_single_set (rtx_insn *);
7141 static void ia64_emit_insn_before (rtx, rtx);
7143 /* Map a bundle number to its pseudo-op. */
7145 const char *
7146 get_bundle_name (int b)
7148 return bundle_name[b];
7152 /* Return the maximum number of instructions a cpu can issue. */
7154 static int
7155 ia64_issue_rate (void)
7157 return 6;
7160 /* Helper function - like single_set, but look inside COND_EXEC. */
7162 static rtx
7163 ia64_single_set (rtx_insn *insn)
7165 rtx x = PATTERN (insn), ret;
7166 if (GET_CODE (x) == COND_EXEC)
7167 x = COND_EXEC_CODE (x);
7168 if (GET_CODE (x) == SET)
7169 return x;
7171 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7172 Although they are not classical single set, the second set is there just
7173 to protect it from moving past FP-relative stack accesses. */
7174 switch (recog_memoized (insn))
7176 case CODE_FOR_prologue_allocate_stack:
7177 case CODE_FOR_prologue_allocate_stack_pr:
7178 case CODE_FOR_epilogue_deallocate_stack:
7179 case CODE_FOR_epilogue_deallocate_stack_pr:
7180 ret = XVECEXP (x, 0, 0);
7181 break;
7183 default:
7184 ret = single_set_2 (insn, x);
7185 break;
7188 return ret;
7191 /* Adjust the cost of a scheduling dependency.
7192 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7193 COST is the current cost, DW is dependency weakness. */
7194 static int
7195 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7196 int cost, dw_t dw)
7198 enum reg_note dep_type = (enum reg_note) dep_type1;
7199 enum attr_itanium_class dep_class;
7200 enum attr_itanium_class insn_class;
7202 insn_class = ia64_safe_itanium_class (insn);
7203 dep_class = ia64_safe_itanium_class (dep_insn);
7205 /* Treat true memory dependencies separately. Ignore apparent true
7206 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7207 if (dep_type == REG_DEP_TRUE
7208 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7209 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7210 return 0;
7212 if (dw == MIN_DEP_WEAK)
7213 /* Store and load are likely to alias, use higher cost to avoid stall. */
7214 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7215 else if (dw > MIN_DEP_WEAK)
7217 /* Store and load are less likely to alias. */
7218 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7219 /* Assume there will be no cache conflict for floating-point data.
7220 For integer data, L1 conflict penalty is huge (17 cycles), so we
7221 never assume it will not cause a conflict. */
7222 return 0;
7223 else
7224 return cost;
7227 if (dep_type != REG_DEP_OUTPUT)
7228 return cost;
7230 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7231 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7232 return 0;
7234 return cost;
7237 /* Like emit_insn_before, but skip cycle_display notes.
7238 ??? When cycle display notes are implemented, update this. */
7240 static void
7241 ia64_emit_insn_before (rtx insn, rtx before)
7243 emit_insn_before (insn, before);
7246 /* The following function marks insns who produce addresses for load
7247 and store insns. Such insns will be placed into M slots because it
7248 decrease latency time for Itanium1 (see function
7249 `ia64_produce_address_p' and the DFA descriptions). */
7251 static void
7252 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7254 rtx_insn *insn, *next, *next_tail;
7256 /* Before reload, which_alternative is not set, which means that
7257 ia64_safe_itanium_class will produce wrong results for (at least)
7258 move instructions. */
7259 if (!reload_completed)
7260 return;
7262 next_tail = NEXT_INSN (tail);
7263 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7264 if (INSN_P (insn))
7265 insn->call = 0;
7266 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7267 if (INSN_P (insn)
7268 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7270 sd_iterator_def sd_it;
7271 dep_t dep;
7272 bool has_mem_op_consumer_p = false;
7274 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7276 enum attr_itanium_class c;
7278 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7279 continue;
7281 next = DEP_CON (dep);
7282 c = ia64_safe_itanium_class (next);
7283 if ((c == ITANIUM_CLASS_ST
7284 || c == ITANIUM_CLASS_STF)
7285 && ia64_st_address_bypass_p (insn, next))
7287 has_mem_op_consumer_p = true;
7288 break;
7290 else if ((c == ITANIUM_CLASS_LD
7291 || c == ITANIUM_CLASS_FLD
7292 || c == ITANIUM_CLASS_FLDP)
7293 && ia64_ld_address_bypass_p (insn, next))
7295 has_mem_op_consumer_p = true;
7296 break;
7300 insn->call = has_mem_op_consumer_p;
7304 /* We're beginning a new block. Initialize data structures as necessary. */
7306 static void
7307 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7308 int sched_verbose ATTRIBUTE_UNUSED,
7309 int max_ready ATTRIBUTE_UNUSED)
7311 if (flag_checking && !sel_sched_p () && reload_completed)
7313 for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7314 insn != current_sched_info->next_tail;
7315 insn = NEXT_INSN (insn))
7316 gcc_assert (!SCHED_GROUP_P (insn));
7318 last_scheduled_insn = NULL;
7319 init_insn_group_barriers ();
7321 current_cycle = 0;
7322 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7325 /* We're beginning a scheduling pass. Check assertion. */
7327 static void
7328 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7329 int sched_verbose ATTRIBUTE_UNUSED,
7330 int max_ready ATTRIBUTE_UNUSED)
7332 gcc_assert (pending_data_specs == 0);
7335 /* Scheduling pass is now finished. Free/reset static variable. */
7336 static void
7337 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7338 int sched_verbose ATTRIBUTE_UNUSED)
7340 gcc_assert (pending_data_specs == 0);
7343 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7344 speculation check), FALSE otherwise. */
7345 static bool
7346 is_load_p (rtx_insn *insn)
7348 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7350 return
7351 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7352 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7355 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7356 (taking account for 3-cycle cache reference postponing for stores: Intel
7357 Itanium 2 Reference Manual for Software Development and Optimization,
7358 6.7.3.1). */
7359 static void
7360 record_memory_reference (rtx_insn *insn)
7362 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7364 switch (insn_class) {
7365 case ITANIUM_CLASS_FLD:
7366 case ITANIUM_CLASS_LD:
7367 mem_ops_in_group[current_cycle % 4]++;
7368 break;
7369 case ITANIUM_CLASS_STF:
7370 case ITANIUM_CLASS_ST:
7371 mem_ops_in_group[(current_cycle + 3) % 4]++;
7372 break;
7373 default:;
7377 /* We are about to being issuing insns for this clock cycle.
7378 Override the default sort algorithm to better slot instructions. */
7380 static int
7381 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7382 int *pn_ready, int clock_var,
7383 int reorder_type)
7385 int n_asms;
7386 int n_ready = *pn_ready;
7387 rtx_insn **e_ready = ready + n_ready;
7388 rtx_insn **insnp;
7390 if (sched_verbose)
7391 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7393 if (reorder_type == 0)
7395 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7396 n_asms = 0;
7397 for (insnp = ready; insnp < e_ready; insnp++)
7398 if (insnp < e_ready)
7400 rtx_insn *insn = *insnp;
7401 enum attr_type t = ia64_safe_type (insn);
7402 if (t == TYPE_UNKNOWN)
7404 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7405 || asm_noperands (PATTERN (insn)) >= 0)
7407 rtx_insn *lowest = ready[n_asms];
7408 ready[n_asms] = insn;
7409 *insnp = lowest;
7410 n_asms++;
7412 else
7414 rtx_insn *highest = ready[n_ready - 1];
7415 ready[n_ready - 1] = insn;
7416 *insnp = highest;
7417 return 1;
7422 if (n_asms < n_ready)
7424 /* Some normal insns to process. Skip the asms. */
7425 ready += n_asms;
7426 n_ready -= n_asms;
7428 else if (n_ready > 0)
7429 return 1;
7432 if (ia64_final_schedule)
7434 int deleted = 0;
7435 int nr_need_stop = 0;
7437 for (insnp = ready; insnp < e_ready; insnp++)
7438 if (safe_group_barrier_needed (*insnp))
7439 nr_need_stop++;
7441 if (reorder_type == 1 && n_ready == nr_need_stop)
7442 return 0;
7443 if (reorder_type == 0)
7444 return 1;
7445 insnp = e_ready;
7446 /* Move down everything that needs a stop bit, preserving
7447 relative order. */
7448 while (insnp-- > ready + deleted)
7449 while (insnp >= ready + deleted)
7451 rtx_insn *insn = *insnp;
7452 if (! safe_group_barrier_needed (insn))
7453 break;
7454 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7455 *ready = insn;
7456 deleted++;
7458 n_ready -= deleted;
7459 ready += deleted;
7462 current_cycle = clock_var;
7463 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7465 int moved = 0;
7467 insnp = e_ready;
7468 /* Move down loads/stores, preserving relative order. */
7469 while (insnp-- > ready + moved)
7470 while (insnp >= ready + moved)
7472 rtx_insn *insn = *insnp;
7473 if (! is_load_p (insn))
7474 break;
7475 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7476 *ready = insn;
7477 moved++;
7479 n_ready -= moved;
7480 ready += moved;
7483 return 1;
7486 /* We are about to being issuing insns for this clock cycle. Override
7487 the default sort algorithm to better slot instructions. */
7489 static int
7490 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7491 int *pn_ready, int clock_var)
7493 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7494 pn_ready, clock_var, 0);
7497 /* Like ia64_sched_reorder, but called after issuing each insn.
7498 Override the default sort algorithm to better slot instructions. */
7500 static int
7501 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7502 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7503 int *pn_ready, int clock_var)
7505 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7506 clock_var, 1);
7509 /* We are about to issue INSN. Return the number of insns left on the
7510 ready queue that can be issued this cycle. */
7512 static int
7513 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7514 int sched_verbose ATTRIBUTE_UNUSED,
7515 rtx_insn *insn,
7516 int can_issue_more ATTRIBUTE_UNUSED)
7518 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7519 /* Modulo scheduling does not extend h_i_d when emitting
7520 new instructions. Don't use h_i_d, if we don't have to. */
7522 if (DONE_SPEC (insn) & BEGIN_DATA)
7523 pending_data_specs++;
7524 if (CHECK_SPEC (insn) & BEGIN_DATA)
7525 pending_data_specs--;
7528 if (DEBUG_INSN_P (insn))
7529 return 1;
7531 last_scheduled_insn = insn;
7532 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7533 if (reload_completed)
7535 int needed = group_barrier_needed (insn);
7537 gcc_assert (!needed);
7538 if (CALL_P (insn))
7539 init_insn_group_barriers ();
7540 stops_p [INSN_UID (insn)] = stop_before_p;
7541 stop_before_p = 0;
7543 record_memory_reference (insn);
7545 return 1;
7548 /* We are choosing insn from the ready queue. Return zero if INSN
7549 can be chosen. */
7551 static int
7552 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7554 gcc_assert (insn && INSN_P (insn));
7556 /* Size of ALAT is 32. As far as we perform conservative
7557 data speculation, we keep ALAT half-empty. */
7558 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7559 return ready_index == 0 ? -1 : 1;
7561 if (ready_index == 0)
7562 return 0;
7564 if ((!reload_completed
7565 || !safe_group_barrier_needed (insn))
7566 && (!mflag_sched_mem_insns_hard_limit
7567 || !is_load_p (insn)
7568 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7569 return 0;
7571 return 1;
7574 /* The following variable value is pseudo-insn used by the DFA insn
7575 scheduler to change the DFA state when the simulated clock is
7576 increased. */
7578 static rtx_insn *dfa_pre_cycle_insn;
7580 /* Returns 1 when a meaningful insn was scheduled between the last group
7581 barrier and LAST. */
7582 static int
7583 scheduled_good_insn (rtx_insn *last)
7585 if (last && recog_memoized (last) >= 0)
7586 return 1;
7588 for ( ;
7589 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7590 && !stops_p[INSN_UID (last)];
7591 last = PREV_INSN (last))
7592 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7593 the ebb we're scheduling. */
7594 if (INSN_P (last) && recog_memoized (last) >= 0)
7595 return 1;
7597 return 0;
7600 /* We are about to being issuing INSN. Return nonzero if we cannot
7601 issue it on given cycle CLOCK and return zero if we should not sort
7602 the ready queue on the next clock start. */
7604 static int
7605 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7606 int clock, int *sort_p)
7608 gcc_assert (insn && INSN_P (insn));
7610 if (DEBUG_INSN_P (insn))
7611 return 0;
7613 /* When a group barrier is needed for insn, last_scheduled_insn
7614 should be set. */
7615 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7616 || last_scheduled_insn);
7618 if ((reload_completed
7619 && (safe_group_barrier_needed (insn)
7620 || (mflag_sched_stop_bits_after_every_cycle
7621 && last_clock != clock
7622 && last_scheduled_insn
7623 && scheduled_good_insn (last_scheduled_insn))))
7624 || (last_scheduled_insn
7625 && (CALL_P (last_scheduled_insn)
7626 || unknown_for_bundling_p (last_scheduled_insn))))
7628 init_insn_group_barriers ();
7630 if (verbose && dump)
7631 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7632 last_clock == clock ? " + cycle advance" : "");
7634 stop_before_p = 1;
7635 current_cycle = clock;
7636 mem_ops_in_group[current_cycle % 4] = 0;
7638 if (last_clock == clock)
7640 state_transition (curr_state, dfa_stop_insn);
7641 if (TARGET_EARLY_STOP_BITS)
7642 *sort_p = (last_scheduled_insn == NULL_RTX
7643 || ! CALL_P (last_scheduled_insn));
7644 else
7645 *sort_p = 0;
7646 return 1;
7649 if (last_scheduled_insn)
7651 if (unknown_for_bundling_p (last_scheduled_insn))
7652 state_reset (curr_state);
7653 else
7655 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7656 state_transition (curr_state, dfa_stop_insn);
7657 state_transition (curr_state, dfa_pre_cycle_insn);
7658 state_transition (curr_state, NULL);
7662 return 0;
7665 /* Implement targetm.sched.h_i_d_extended hook.
7666 Extend internal data structures. */
7667 static void
7668 ia64_h_i_d_extended (void)
7670 if (stops_p != NULL)
7672 int new_clocks_length = get_max_uid () * 3 / 2;
7673 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7674 clocks_length = new_clocks_length;
7679 /* This structure describes the data used by the backend to guide scheduling.
7680 When the current scheduling point is switched, this data should be saved
7681 and restored later, if the scheduler returns to this point. */
7682 struct _ia64_sched_context
7684 state_t prev_cycle_state;
7685 rtx_insn *last_scheduled_insn;
7686 struct reg_write_state rws_sum[NUM_REGS];
7687 struct reg_write_state rws_insn[NUM_REGS];
7688 int first_instruction;
7689 int pending_data_specs;
7690 int current_cycle;
7691 char mem_ops_in_group[4];
7693 typedef struct _ia64_sched_context *ia64_sched_context_t;
7695 /* Allocates a scheduling context. */
7696 static void *
7697 ia64_alloc_sched_context (void)
7699 return xmalloc (sizeof (struct _ia64_sched_context));
7702 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7703 the global context otherwise. */
7704 static void
7705 ia64_init_sched_context (void *_sc, bool clean_p)
7707 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7709 sc->prev_cycle_state = xmalloc (dfa_state_size);
7710 if (clean_p)
7712 state_reset (sc->prev_cycle_state);
7713 sc->last_scheduled_insn = NULL;
7714 memset (sc->rws_sum, 0, sizeof (rws_sum));
7715 memset (sc->rws_insn, 0, sizeof (rws_insn));
7716 sc->first_instruction = 1;
7717 sc->pending_data_specs = 0;
7718 sc->current_cycle = 0;
7719 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7721 else
7723 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7724 sc->last_scheduled_insn = last_scheduled_insn;
7725 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7726 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7727 sc->first_instruction = first_instruction;
7728 sc->pending_data_specs = pending_data_specs;
7729 sc->current_cycle = current_cycle;
7730 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7734 /* Sets the global scheduling context to the one pointed to by _SC. */
7735 static void
7736 ia64_set_sched_context (void *_sc)
7738 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7740 gcc_assert (sc != NULL);
7742 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7743 last_scheduled_insn = sc->last_scheduled_insn;
7744 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7745 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7746 first_instruction = sc->first_instruction;
7747 pending_data_specs = sc->pending_data_specs;
7748 current_cycle = sc->current_cycle;
7749 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7752 /* Clears the data in the _SC scheduling context. */
7753 static void
7754 ia64_clear_sched_context (void *_sc)
7756 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7758 free (sc->prev_cycle_state);
7759 sc->prev_cycle_state = NULL;
7762 /* Frees the _SC scheduling context. */
7763 static void
7764 ia64_free_sched_context (void *_sc)
7766 gcc_assert (_sc != NULL);
7768 free (_sc);
7771 typedef rtx (* gen_func_t) (rtx, rtx);
7773 /* Return a function that will generate a load of mode MODE_NO
7774 with speculation types TS. */
7775 static gen_func_t
7776 get_spec_load_gen_function (ds_t ts, int mode_no)
7778 static gen_func_t gen_ld_[] = {
7779 gen_movbi,
7780 gen_movqi_internal,
7781 gen_movhi_internal,
7782 gen_movsi_internal,
7783 gen_movdi_internal,
7784 gen_movsf_internal,
7785 gen_movdf_internal,
7786 gen_movxf_internal,
7787 gen_movti_internal,
7788 gen_zero_extendqidi2,
7789 gen_zero_extendhidi2,
7790 gen_zero_extendsidi2,
7793 static gen_func_t gen_ld_a[] = {
7794 gen_movbi_advanced,
7795 gen_movqi_advanced,
7796 gen_movhi_advanced,
7797 gen_movsi_advanced,
7798 gen_movdi_advanced,
7799 gen_movsf_advanced,
7800 gen_movdf_advanced,
7801 gen_movxf_advanced,
7802 gen_movti_advanced,
7803 gen_zero_extendqidi2_advanced,
7804 gen_zero_extendhidi2_advanced,
7805 gen_zero_extendsidi2_advanced,
7807 static gen_func_t gen_ld_s[] = {
7808 gen_movbi_speculative,
7809 gen_movqi_speculative,
7810 gen_movhi_speculative,
7811 gen_movsi_speculative,
7812 gen_movdi_speculative,
7813 gen_movsf_speculative,
7814 gen_movdf_speculative,
7815 gen_movxf_speculative,
7816 gen_movti_speculative,
7817 gen_zero_extendqidi2_speculative,
7818 gen_zero_extendhidi2_speculative,
7819 gen_zero_extendsidi2_speculative,
7821 static gen_func_t gen_ld_sa[] = {
7822 gen_movbi_speculative_advanced,
7823 gen_movqi_speculative_advanced,
7824 gen_movhi_speculative_advanced,
7825 gen_movsi_speculative_advanced,
7826 gen_movdi_speculative_advanced,
7827 gen_movsf_speculative_advanced,
7828 gen_movdf_speculative_advanced,
7829 gen_movxf_speculative_advanced,
7830 gen_movti_speculative_advanced,
7831 gen_zero_extendqidi2_speculative_advanced,
7832 gen_zero_extendhidi2_speculative_advanced,
7833 gen_zero_extendsidi2_speculative_advanced,
7835 static gen_func_t gen_ld_s_a[] = {
7836 gen_movbi_speculative_a,
7837 gen_movqi_speculative_a,
7838 gen_movhi_speculative_a,
7839 gen_movsi_speculative_a,
7840 gen_movdi_speculative_a,
7841 gen_movsf_speculative_a,
7842 gen_movdf_speculative_a,
7843 gen_movxf_speculative_a,
7844 gen_movti_speculative_a,
7845 gen_zero_extendqidi2_speculative_a,
7846 gen_zero_extendhidi2_speculative_a,
7847 gen_zero_extendsidi2_speculative_a,
7850 gen_func_t *gen_ld;
7852 if (ts & BEGIN_DATA)
7854 if (ts & BEGIN_CONTROL)
7855 gen_ld = gen_ld_sa;
7856 else
7857 gen_ld = gen_ld_a;
7859 else if (ts & BEGIN_CONTROL)
7861 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7862 || ia64_needs_block_p (ts))
7863 gen_ld = gen_ld_s;
7864 else
7865 gen_ld = gen_ld_s_a;
7867 else if (ts == 0)
7868 gen_ld = gen_ld_;
7869 else
7870 gcc_unreachable ();
7872 return gen_ld[mode_no];
7875 /* Constants that help mapping 'machine_mode' to int. */
7876 enum SPEC_MODES
7878 SPEC_MODE_INVALID = -1,
7879 SPEC_MODE_FIRST = 0,
7880 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7881 SPEC_MODE_FOR_EXTEND_LAST = 3,
7882 SPEC_MODE_LAST = 8
7885 enum
7887 /* Offset to reach ZERO_EXTEND patterns. */
7888 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7891 /* Return index of the MODE. */
7892 static int
7893 ia64_mode_to_int (machine_mode mode)
7895 switch (mode)
7897 case BImode: return 0; /* SPEC_MODE_FIRST */
7898 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7899 case HImode: return 2;
7900 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7901 case DImode: return 4;
7902 case SFmode: return 5;
7903 case DFmode: return 6;
7904 case XFmode: return 7;
7905 case TImode:
7906 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7907 mentioned in itanium[12].md. Predicate fp_register_operand also
7908 needs to be defined. Bottom line: better disable for now. */
7909 return SPEC_MODE_INVALID;
7910 default: return SPEC_MODE_INVALID;
7914 /* Provide information about speculation capabilities. */
7915 static void
7916 ia64_set_sched_flags (spec_info_t spec_info)
7918 unsigned int *flags = &(current_sched_info->flags);
7920 if (*flags & SCHED_RGN
7921 || *flags & SCHED_EBB
7922 || *flags & SEL_SCHED)
7924 int mask = 0;
7926 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7927 || (mflag_sched_ar_data_spec && reload_completed))
7929 mask |= BEGIN_DATA;
7931 if (!sel_sched_p ()
7932 && ((mflag_sched_br_in_data_spec && !reload_completed)
7933 || (mflag_sched_ar_in_data_spec && reload_completed)))
7934 mask |= BE_IN_DATA;
7937 if (mflag_sched_control_spec
7938 && (!sel_sched_p ()
7939 || reload_completed))
7941 mask |= BEGIN_CONTROL;
7943 if (!sel_sched_p () && mflag_sched_in_control_spec)
7944 mask |= BE_IN_CONTROL;
7947 spec_info->mask = mask;
7949 if (mask)
7951 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7953 if (mask & BE_IN_SPEC)
7954 *flags |= NEW_BBS;
7956 spec_info->flags = 0;
7958 if ((mask & CONTROL_SPEC)
7959 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7960 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7962 if (sched_verbose >= 1)
7963 spec_info->dump = sched_dump;
7964 else
7965 spec_info->dump = 0;
7967 if (mflag_sched_count_spec_in_critical_path)
7968 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7971 else
7972 spec_info->mask = 0;
7975 /* If INSN is an appropriate load return its mode.
7976 Return -1 otherwise. */
7977 static int
7978 get_mode_no_for_insn (rtx_insn *insn)
7980 rtx reg, mem, mode_rtx;
7981 int mode_no;
7982 bool extend_p;
7984 extract_insn_cached (insn);
7986 /* We use WHICH_ALTERNATIVE only after reload. This will
7987 guarantee that reload won't touch a speculative insn. */
7989 if (recog_data.n_operands != 2)
7990 return -1;
7992 reg = recog_data.operand[0];
7993 mem = recog_data.operand[1];
7995 /* We should use MEM's mode since REG's mode in presence of
7996 ZERO_EXTEND will always be DImode. */
7997 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7998 /* Process non-speculative ld. */
8000 if (!reload_completed)
8002 /* Do not speculate into regs like ar.lc. */
8003 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8004 return -1;
8006 if (!MEM_P (mem))
8007 return -1;
8010 rtx mem_reg = XEXP (mem, 0);
8012 if (!REG_P (mem_reg))
8013 return -1;
8016 mode_rtx = mem;
8018 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8020 gcc_assert (REG_P (reg) && MEM_P (mem));
8021 mode_rtx = mem;
8023 else
8024 return -1;
8026 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8027 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8028 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8029 /* Process speculative ld or ld.c. */
8031 gcc_assert (REG_P (reg) && MEM_P (mem));
8032 mode_rtx = mem;
8034 else
8036 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8038 if (attr_class == ITANIUM_CLASS_CHK_A
8039 || attr_class == ITANIUM_CLASS_CHK_S_I
8040 || attr_class == ITANIUM_CLASS_CHK_S_F)
8041 /* Process chk. */
8042 mode_rtx = reg;
8043 else
8044 return -1;
8047 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8049 if (mode_no == SPEC_MODE_INVALID)
8050 return -1;
8052 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8054 if (extend_p)
8056 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8057 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8058 return -1;
8060 mode_no += SPEC_GEN_EXTEND_OFFSET;
8063 return mode_no;
8066 /* If X is an unspec part of a speculative load, return its code.
8067 Return -1 otherwise. */
8068 static int
8069 get_spec_unspec_code (const_rtx x)
8071 if (GET_CODE (x) != UNSPEC)
8072 return -1;
8075 int code;
8077 code = XINT (x, 1);
8079 switch (code)
8081 case UNSPEC_LDA:
8082 case UNSPEC_LDS:
8083 case UNSPEC_LDS_A:
8084 case UNSPEC_LDSA:
8085 return code;
8087 default:
8088 return -1;
8093 /* Implement skip_rtx_p hook. */
8094 static bool
8095 ia64_skip_rtx_p (const_rtx x)
8097 return get_spec_unspec_code (x) != -1;
8100 /* If INSN is a speculative load, return its UNSPEC code.
8101 Return -1 otherwise. */
8102 static int
8103 get_insn_spec_code (const_rtx insn)
8105 rtx pat, reg, mem;
8107 pat = PATTERN (insn);
8109 if (GET_CODE (pat) == COND_EXEC)
8110 pat = COND_EXEC_CODE (pat);
8112 if (GET_CODE (pat) != SET)
8113 return -1;
8115 reg = SET_DEST (pat);
8116 if (!REG_P (reg))
8117 return -1;
8119 mem = SET_SRC (pat);
8120 if (GET_CODE (mem) == ZERO_EXTEND)
8121 mem = XEXP (mem, 0);
8123 return get_spec_unspec_code (mem);
8126 /* If INSN is a speculative load, return a ds with the speculation types.
8127 Otherwise [if INSN is a normal instruction] return 0. */
8128 static ds_t
8129 ia64_get_insn_spec_ds (rtx_insn *insn)
8131 int code = get_insn_spec_code (insn);
8133 switch (code)
8135 case UNSPEC_LDA:
8136 return BEGIN_DATA;
8138 case UNSPEC_LDS:
8139 case UNSPEC_LDS_A:
8140 return BEGIN_CONTROL;
8142 case UNSPEC_LDSA:
8143 return BEGIN_DATA | BEGIN_CONTROL;
8145 default:
8146 return 0;
8150 /* If INSN is a speculative load return a ds with the speculation types that
8151 will be checked.
8152 Otherwise [if INSN is a normal instruction] return 0. */
8153 static ds_t
8154 ia64_get_insn_checked_ds (rtx_insn *insn)
8156 int code = get_insn_spec_code (insn);
8158 switch (code)
8160 case UNSPEC_LDA:
8161 return BEGIN_DATA | BEGIN_CONTROL;
8163 case UNSPEC_LDS:
8164 return BEGIN_CONTROL;
8166 case UNSPEC_LDS_A:
8167 case UNSPEC_LDSA:
8168 return BEGIN_DATA | BEGIN_CONTROL;
8170 default:
8171 return 0;
8175 /* If GEN_P is true, calculate the index of needed speculation check and return
8176 speculative pattern for INSN with speculative mode TS, machine mode
8177 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8178 If GEN_P is false, just calculate the index of needed speculation check. */
8179 static rtx
8180 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8182 rtx pat, new_pat;
8183 gen_func_t gen_load;
8185 gen_load = get_spec_load_gen_function (ts, mode_no);
8187 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8188 copy_rtx (recog_data.operand[1]));
8190 pat = PATTERN (insn);
8191 if (GET_CODE (pat) == COND_EXEC)
8192 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8193 new_pat);
8195 return new_pat;
8198 static bool
8199 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8200 ds_t ds ATTRIBUTE_UNUSED)
8202 return false;
8205 /* Implement targetm.sched.speculate_insn hook.
8206 Check if the INSN can be TS speculative.
8207 If 'no' - return -1.
8208 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8209 If current pattern of the INSN already provides TS speculation,
8210 return 0. */
8211 static int
8212 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8214 int mode_no;
8215 int res;
8217 gcc_assert (!(ts & ~SPECULATIVE));
8219 if (ia64_spec_check_p (insn))
8220 return -1;
8222 if ((ts & BE_IN_SPEC)
8223 && !insn_can_be_in_speculative_p (insn, ts))
8224 return -1;
8226 mode_no = get_mode_no_for_insn (insn);
8228 if (mode_no != SPEC_MODE_INVALID)
8230 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8231 res = 0;
8232 else
8234 res = 1;
8235 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8238 else
8239 res = -1;
8241 return res;
8244 /* Return a function that will generate a check for speculation TS with mode
8245 MODE_NO.
8246 If simple check is needed, pass true for SIMPLE_CHECK_P.
8247 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8248 static gen_func_t
8249 get_spec_check_gen_function (ds_t ts, int mode_no,
8250 bool simple_check_p, bool clearing_check_p)
8252 static gen_func_t gen_ld_c_clr[] = {
8253 gen_movbi_clr,
8254 gen_movqi_clr,
8255 gen_movhi_clr,
8256 gen_movsi_clr,
8257 gen_movdi_clr,
8258 gen_movsf_clr,
8259 gen_movdf_clr,
8260 gen_movxf_clr,
8261 gen_movti_clr,
8262 gen_zero_extendqidi2_clr,
8263 gen_zero_extendhidi2_clr,
8264 gen_zero_extendsidi2_clr,
8266 static gen_func_t gen_ld_c_nc[] = {
8267 gen_movbi_nc,
8268 gen_movqi_nc,
8269 gen_movhi_nc,
8270 gen_movsi_nc,
8271 gen_movdi_nc,
8272 gen_movsf_nc,
8273 gen_movdf_nc,
8274 gen_movxf_nc,
8275 gen_movti_nc,
8276 gen_zero_extendqidi2_nc,
8277 gen_zero_extendhidi2_nc,
8278 gen_zero_extendsidi2_nc,
8280 static gen_func_t gen_chk_a_clr[] = {
8281 gen_advanced_load_check_clr_bi,
8282 gen_advanced_load_check_clr_qi,
8283 gen_advanced_load_check_clr_hi,
8284 gen_advanced_load_check_clr_si,
8285 gen_advanced_load_check_clr_di,
8286 gen_advanced_load_check_clr_sf,
8287 gen_advanced_load_check_clr_df,
8288 gen_advanced_load_check_clr_xf,
8289 gen_advanced_load_check_clr_ti,
8290 gen_advanced_load_check_clr_di,
8291 gen_advanced_load_check_clr_di,
8292 gen_advanced_load_check_clr_di,
8294 static gen_func_t gen_chk_a_nc[] = {
8295 gen_advanced_load_check_nc_bi,
8296 gen_advanced_load_check_nc_qi,
8297 gen_advanced_load_check_nc_hi,
8298 gen_advanced_load_check_nc_si,
8299 gen_advanced_load_check_nc_di,
8300 gen_advanced_load_check_nc_sf,
8301 gen_advanced_load_check_nc_df,
8302 gen_advanced_load_check_nc_xf,
8303 gen_advanced_load_check_nc_ti,
8304 gen_advanced_load_check_nc_di,
8305 gen_advanced_load_check_nc_di,
8306 gen_advanced_load_check_nc_di,
8308 static gen_func_t gen_chk_s[] = {
8309 gen_speculation_check_bi,
8310 gen_speculation_check_qi,
8311 gen_speculation_check_hi,
8312 gen_speculation_check_si,
8313 gen_speculation_check_di,
8314 gen_speculation_check_sf,
8315 gen_speculation_check_df,
8316 gen_speculation_check_xf,
8317 gen_speculation_check_ti,
8318 gen_speculation_check_di,
8319 gen_speculation_check_di,
8320 gen_speculation_check_di,
8323 gen_func_t *gen_check;
8325 if (ts & BEGIN_DATA)
8327 /* We don't need recovery because even if this is ld.sa
8328 ALAT entry will be allocated only if NAT bit is set to zero.
8329 So it is enough to use ld.c here. */
8331 if (simple_check_p)
8333 gcc_assert (mflag_sched_spec_ldc);
8335 if (clearing_check_p)
8336 gen_check = gen_ld_c_clr;
8337 else
8338 gen_check = gen_ld_c_nc;
8340 else
8342 if (clearing_check_p)
8343 gen_check = gen_chk_a_clr;
8344 else
8345 gen_check = gen_chk_a_nc;
8348 else if (ts & BEGIN_CONTROL)
8350 if (simple_check_p)
8351 /* We might want to use ld.sa -> ld.c instead of
8352 ld.s -> chk.s. */
8354 gcc_assert (!ia64_needs_block_p (ts));
8356 if (clearing_check_p)
8357 gen_check = gen_ld_c_clr;
8358 else
8359 gen_check = gen_ld_c_nc;
8361 else
8363 gen_check = gen_chk_s;
8366 else
8367 gcc_unreachable ();
8369 gcc_assert (mode_no >= 0);
8370 return gen_check[mode_no];
8373 /* Return nonzero, if INSN needs branchy recovery check. */
8374 static bool
8375 ia64_needs_block_p (ds_t ts)
8377 if (ts & BEGIN_DATA)
8378 return !mflag_sched_spec_ldc;
8380 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8382 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8385 /* Generate (or regenerate) a recovery check for INSN. */
8386 static rtx
8387 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8389 rtx op1, pat, check_pat;
8390 gen_func_t gen_check;
8391 int mode_no;
8393 mode_no = get_mode_no_for_insn (insn);
8394 gcc_assert (mode_no >= 0);
8396 if (label)
8397 op1 = label;
8398 else
8400 gcc_assert (!ia64_needs_block_p (ds));
8401 op1 = copy_rtx (recog_data.operand[1]);
8404 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8405 true);
8407 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8409 pat = PATTERN (insn);
8410 if (GET_CODE (pat) == COND_EXEC)
8411 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8412 check_pat);
8414 return check_pat;
8417 /* Return nonzero, if X is branchy recovery check. */
8418 static int
8419 ia64_spec_check_p (rtx x)
8421 x = PATTERN (x);
8422 if (GET_CODE (x) == COND_EXEC)
8423 x = COND_EXEC_CODE (x);
8424 if (GET_CODE (x) == SET)
8425 return ia64_spec_check_src_p (SET_SRC (x));
8426 return 0;
8429 /* Return nonzero, if SRC belongs to recovery check. */
8430 static int
8431 ia64_spec_check_src_p (rtx src)
8433 if (GET_CODE (src) == IF_THEN_ELSE)
8435 rtx t;
8437 t = XEXP (src, 0);
8438 if (GET_CODE (t) == NE)
8440 t = XEXP (t, 0);
8442 if (GET_CODE (t) == UNSPEC)
8444 int code;
8446 code = XINT (t, 1);
8448 if (code == UNSPEC_LDCCLR
8449 || code == UNSPEC_LDCNC
8450 || code == UNSPEC_CHKACLR
8451 || code == UNSPEC_CHKANC
8452 || code == UNSPEC_CHKS)
8454 gcc_assert (code != 0);
8455 return code;
8460 return 0;
8464 /* The following page contains abstract data `bundle states' which are
8465 used for bundling insns (inserting nops and template generation). */
8467 /* The following describes state of insn bundling. */
8469 struct bundle_state
8471 /* Unique bundle state number to identify them in the debugging
8472 output */
8473 int unique_num;
8474 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
8475 /* number nops before and after the insn */
8476 short before_nops_num, after_nops_num;
8477 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8478 insn */
8479 int cost; /* cost of the state in cycles */
8480 int accumulated_insns_num; /* number of all previous insns including
8481 nops. L is considered as 2 insns */
8482 int branch_deviation; /* deviation of previous branches from 3rd slots */
8483 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8484 struct bundle_state *next; /* next state with the same insn_num */
8485 struct bundle_state *originator; /* originator (previous insn state) */
8486 /* All bundle states are in the following chain. */
8487 struct bundle_state *allocated_states_chain;
8488 /* The DFA State after issuing the insn and the nops. */
8489 state_t dfa_state;
8492 /* The following is map insn number to the corresponding bundle state. */
8494 static struct bundle_state **index_to_bundle_states;
8496 /* The unique number of next bundle state. */
8498 static int bundle_states_num;
8500 /* All allocated bundle states are in the following chain. */
8502 static struct bundle_state *allocated_bundle_states_chain;
8504 /* All allocated but not used bundle states are in the following
8505 chain. */
8507 static struct bundle_state *free_bundle_state_chain;
8510 /* The following function returns a free bundle state. */
8512 static struct bundle_state *
8513 get_free_bundle_state (void)
8515 struct bundle_state *result;
8517 if (free_bundle_state_chain != NULL)
8519 result = free_bundle_state_chain;
8520 free_bundle_state_chain = result->next;
8522 else
8524 result = XNEW (struct bundle_state);
8525 result->dfa_state = xmalloc (dfa_state_size);
8526 result->allocated_states_chain = allocated_bundle_states_chain;
8527 allocated_bundle_states_chain = result;
8529 result->unique_num = bundle_states_num++;
8530 return result;
8534 /* The following function frees given bundle state. */
8536 static void
8537 free_bundle_state (struct bundle_state *state)
8539 state->next = free_bundle_state_chain;
8540 free_bundle_state_chain = state;
8543 /* Start work with abstract data `bundle states'. */
8545 static void
8546 initiate_bundle_states (void)
8548 bundle_states_num = 0;
8549 free_bundle_state_chain = NULL;
8550 allocated_bundle_states_chain = NULL;
8553 /* Finish work with abstract data `bundle states'. */
8555 static void
8556 finish_bundle_states (void)
8558 struct bundle_state *curr_state, *next_state;
8560 for (curr_state = allocated_bundle_states_chain;
8561 curr_state != NULL;
8562 curr_state = next_state)
8564 next_state = curr_state->allocated_states_chain;
8565 free (curr_state->dfa_state);
8566 free (curr_state);
8570 /* Hashtable helpers. */
8572 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8574 static inline hashval_t hash (const bundle_state *);
8575 static inline bool equal (const bundle_state *, const bundle_state *);
8578 /* The function returns hash of BUNDLE_STATE. */
8580 inline hashval_t
8581 bundle_state_hasher::hash (const bundle_state *state)
8583 unsigned result, i;
8585 for (result = i = 0; i < dfa_state_size; i++)
8586 result += (((unsigned char *) state->dfa_state) [i]
8587 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8588 return result + state->insn_num;
8591 /* The function returns nonzero if the bundle state keys are equal. */
8593 inline bool
8594 bundle_state_hasher::equal (const bundle_state *state1,
8595 const bundle_state *state2)
8597 return (state1->insn_num == state2->insn_num
8598 && memcmp (state1->dfa_state, state2->dfa_state,
8599 dfa_state_size) == 0);
8602 /* Hash table of the bundle states. The key is dfa_state and insn_num
8603 of the bundle states. */
8605 static hash_table<bundle_state_hasher> *bundle_state_table;
8607 /* The function inserts the BUNDLE_STATE into the hash table. The
8608 function returns nonzero if the bundle has been inserted into the
8609 table. The table contains the best bundle state with given key. */
8611 static int
8612 insert_bundle_state (struct bundle_state *bundle_state)
8614 struct bundle_state **entry_ptr;
8616 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8617 if (*entry_ptr == NULL)
8619 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8620 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8621 *entry_ptr = bundle_state;
8622 return TRUE;
8624 else if (bundle_state->cost < (*entry_ptr)->cost
8625 || (bundle_state->cost == (*entry_ptr)->cost
8626 && ((*entry_ptr)->accumulated_insns_num
8627 > bundle_state->accumulated_insns_num
8628 || ((*entry_ptr)->accumulated_insns_num
8629 == bundle_state->accumulated_insns_num
8630 && ((*entry_ptr)->branch_deviation
8631 > bundle_state->branch_deviation
8632 || ((*entry_ptr)->branch_deviation
8633 == bundle_state->branch_deviation
8634 && (*entry_ptr)->middle_bundle_stops
8635 > bundle_state->middle_bundle_stops))))))
8638 struct bundle_state temp;
8640 temp = **entry_ptr;
8641 **entry_ptr = *bundle_state;
8642 (*entry_ptr)->next = temp.next;
8643 *bundle_state = temp;
8645 return FALSE;
8648 /* Start work with the hash table. */
8650 static void
8651 initiate_bundle_state_table (void)
8653 bundle_state_table = new hash_table<bundle_state_hasher> (50);
8656 /* Finish work with the hash table. */
8658 static void
8659 finish_bundle_state_table (void)
8661 delete bundle_state_table;
8662 bundle_state_table = NULL;
8667 /* The following variable is a insn `nop' used to check bundle states
8668 with different number of inserted nops. */
8670 static rtx_insn *ia64_nop;
8672 /* The following function tries to issue NOPS_NUM nops for the current
8673 state without advancing processor cycle. If it failed, the
8674 function returns FALSE and frees the current state. */
8676 static int
8677 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8679 int i;
8681 for (i = 0; i < nops_num; i++)
8682 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8684 free_bundle_state (curr_state);
8685 return FALSE;
8687 return TRUE;
8690 /* The following function tries to issue INSN for the current
8691 state without advancing processor cycle. If it failed, the
8692 function returns FALSE and frees the current state. */
8694 static int
8695 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8697 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8699 free_bundle_state (curr_state);
8700 return FALSE;
8702 return TRUE;
8705 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8706 starting with ORIGINATOR without advancing processor cycle. If
8707 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8708 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8709 If it was successful, the function creates new bundle state and
8710 insert into the hash table and into `index_to_bundle_states'. */
8712 static void
8713 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8714 rtx_insn *insn, int try_bundle_end_p,
8715 int only_bundle_end_p)
8717 struct bundle_state *curr_state;
8719 curr_state = get_free_bundle_state ();
8720 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8721 curr_state->insn = insn;
8722 curr_state->insn_num = originator->insn_num + 1;
8723 curr_state->cost = originator->cost;
8724 curr_state->originator = originator;
8725 curr_state->before_nops_num = before_nops_num;
8726 curr_state->after_nops_num = 0;
8727 curr_state->accumulated_insns_num
8728 = originator->accumulated_insns_num + before_nops_num;
8729 curr_state->branch_deviation = originator->branch_deviation;
8730 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8731 gcc_assert (insn);
8732 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8734 gcc_assert (GET_MODE (insn) != TImode);
8735 if (!try_issue_nops (curr_state, before_nops_num))
8736 return;
8737 if (!try_issue_insn (curr_state, insn))
8738 return;
8739 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8740 if (curr_state->accumulated_insns_num % 3 != 0)
8741 curr_state->middle_bundle_stops++;
8742 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8743 && curr_state->accumulated_insns_num % 3 != 0)
8745 free_bundle_state (curr_state);
8746 return;
8749 else if (GET_MODE (insn) != TImode)
8751 if (!try_issue_nops (curr_state, before_nops_num))
8752 return;
8753 if (!try_issue_insn (curr_state, insn))
8754 return;
8755 curr_state->accumulated_insns_num++;
8756 gcc_assert (!unknown_for_bundling_p (insn));
8758 if (ia64_safe_type (insn) == TYPE_L)
8759 curr_state->accumulated_insns_num++;
8761 else
8763 /* If this is an insn that must be first in a group, then don't allow
8764 nops to be emitted before it. Currently, alloc is the only such
8765 supported instruction. */
8766 /* ??? The bundling automatons should handle this for us, but they do
8767 not yet have support for the first_insn attribute. */
8768 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8770 free_bundle_state (curr_state);
8771 return;
8774 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8775 state_transition (curr_state->dfa_state, NULL);
8776 curr_state->cost++;
8777 if (!try_issue_nops (curr_state, before_nops_num))
8778 return;
8779 if (!try_issue_insn (curr_state, insn))
8780 return;
8781 curr_state->accumulated_insns_num++;
8782 if (unknown_for_bundling_p (insn))
8784 /* Finish bundle containing asm insn. */
8785 curr_state->after_nops_num
8786 = 3 - curr_state->accumulated_insns_num % 3;
8787 curr_state->accumulated_insns_num
8788 += 3 - curr_state->accumulated_insns_num % 3;
8790 else if (ia64_safe_type (insn) == TYPE_L)
8791 curr_state->accumulated_insns_num++;
8793 if (ia64_safe_type (insn) == TYPE_B)
8794 curr_state->branch_deviation
8795 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8796 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8798 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8800 state_t dfa_state;
8801 struct bundle_state *curr_state1;
8802 struct bundle_state *allocated_states_chain;
8804 curr_state1 = get_free_bundle_state ();
8805 dfa_state = curr_state1->dfa_state;
8806 allocated_states_chain = curr_state1->allocated_states_chain;
8807 *curr_state1 = *curr_state;
8808 curr_state1->dfa_state = dfa_state;
8809 curr_state1->allocated_states_chain = allocated_states_chain;
8810 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8811 dfa_state_size);
8812 curr_state = curr_state1;
8814 if (!try_issue_nops (curr_state,
8815 3 - curr_state->accumulated_insns_num % 3))
8816 return;
8817 curr_state->after_nops_num
8818 = 3 - curr_state->accumulated_insns_num % 3;
8819 curr_state->accumulated_insns_num
8820 += 3 - curr_state->accumulated_insns_num % 3;
8822 if (!insert_bundle_state (curr_state))
8823 free_bundle_state (curr_state);
8824 return;
8827 /* The following function returns position in the two window bundle
8828 for given STATE. */
8830 static int
8831 get_max_pos (state_t state)
8833 if (cpu_unit_reservation_p (state, pos_6))
8834 return 6;
8835 else if (cpu_unit_reservation_p (state, pos_5))
8836 return 5;
8837 else if (cpu_unit_reservation_p (state, pos_4))
8838 return 4;
8839 else if (cpu_unit_reservation_p (state, pos_3))
8840 return 3;
8841 else if (cpu_unit_reservation_p (state, pos_2))
8842 return 2;
8843 else if (cpu_unit_reservation_p (state, pos_1))
8844 return 1;
8845 else
8846 return 0;
8849 /* The function returns code of a possible template for given position
8850 and state. The function should be called only with 2 values of
8851 position equal to 3 or 6. We avoid generating F NOPs by putting
8852 templates containing F insns at the end of the template search
8853 because undocumented anomaly in McKinley derived cores which can
8854 cause stalls if an F-unit insn (including a NOP) is issued within a
8855 six-cycle window after reading certain application registers (such
8856 as ar.bsp). Furthermore, power-considerations also argue against
8857 the use of F-unit instructions unless they're really needed. */
8859 static int
8860 get_template (state_t state, int pos)
8862 switch (pos)
8864 case 3:
8865 if (cpu_unit_reservation_p (state, _0mmi_))
8866 return 1;
8867 else if (cpu_unit_reservation_p (state, _0mii_))
8868 return 0;
8869 else if (cpu_unit_reservation_p (state, _0mmb_))
8870 return 7;
8871 else if (cpu_unit_reservation_p (state, _0mib_))
8872 return 6;
8873 else if (cpu_unit_reservation_p (state, _0mbb_))
8874 return 5;
8875 else if (cpu_unit_reservation_p (state, _0bbb_))
8876 return 4;
8877 else if (cpu_unit_reservation_p (state, _0mmf_))
8878 return 3;
8879 else if (cpu_unit_reservation_p (state, _0mfi_))
8880 return 2;
8881 else if (cpu_unit_reservation_p (state, _0mfb_))
8882 return 8;
8883 else if (cpu_unit_reservation_p (state, _0mlx_))
8884 return 9;
8885 else
8886 gcc_unreachable ();
8887 case 6:
8888 if (cpu_unit_reservation_p (state, _1mmi_))
8889 return 1;
8890 else if (cpu_unit_reservation_p (state, _1mii_))
8891 return 0;
8892 else if (cpu_unit_reservation_p (state, _1mmb_))
8893 return 7;
8894 else if (cpu_unit_reservation_p (state, _1mib_))
8895 return 6;
8896 else if (cpu_unit_reservation_p (state, _1mbb_))
8897 return 5;
8898 else if (cpu_unit_reservation_p (state, _1bbb_))
8899 return 4;
8900 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8901 return 3;
8902 else if (cpu_unit_reservation_p (state, _1mfi_))
8903 return 2;
8904 else if (cpu_unit_reservation_p (state, _1mfb_))
8905 return 8;
8906 else if (cpu_unit_reservation_p (state, _1mlx_))
8907 return 9;
8908 else
8909 gcc_unreachable ();
8910 default:
8911 gcc_unreachable ();
8915 /* True when INSN is important for bundling. */
8917 static bool
8918 important_for_bundling_p (rtx_insn *insn)
8920 return (INSN_P (insn)
8921 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8922 && GET_CODE (PATTERN (insn)) != USE
8923 && GET_CODE (PATTERN (insn)) != CLOBBER);
8926 /* The following function returns an insn important for insn bundling
8927 followed by INSN and before TAIL. */
8929 static rtx_insn *
8930 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
8932 for (; insn && insn != tail; insn = NEXT_INSN (insn))
8933 if (important_for_bundling_p (insn))
8934 return insn;
8935 return NULL;
8938 /* True when INSN is unknown, but important, for bundling. */
8940 static bool
8941 unknown_for_bundling_p (rtx_insn *insn)
8943 return (INSN_P (insn)
8944 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8945 && GET_CODE (PATTERN (insn)) != USE
8946 && GET_CODE (PATTERN (insn)) != CLOBBER);
8949 /* Add a bundle selector TEMPLATE0 before INSN. */
8951 static void
8952 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
8954 rtx b = gen_bundle_selector (GEN_INT (template0));
8956 ia64_emit_insn_before (b, insn);
8957 #if NR_BUNDLES == 10
8958 if ((template0 == 4 || template0 == 5)
8959 && ia64_except_unwind_info (&global_options) == UI_TARGET)
8961 int i;
8962 rtx note = NULL_RTX;
8964 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8965 first or second slot. If it is and has REG_EH_NOTE set, copy it
8966 to following nops, as br.call sets rp to the address of following
8967 bundle and therefore an EH region end must be on a bundle
8968 boundary. */
8969 insn = PREV_INSN (insn);
8970 for (i = 0; i < 3; i++)
8973 insn = next_active_insn (insn);
8974 while (NONJUMP_INSN_P (insn)
8975 && get_attr_empty (insn) == EMPTY_YES);
8976 if (CALL_P (insn))
8977 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8978 else if (note)
8980 int code;
8982 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8983 || code == CODE_FOR_nop_b);
8984 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8985 note = NULL_RTX;
8986 else
8987 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8991 #endif
8994 /* The following function does insn bundling. Bundling means
8995 inserting templates and nop insns to fit insn groups into permitted
8996 templates. Instruction scheduling uses NDFA (non-deterministic
8997 finite automata) encoding informations about the templates and the
8998 inserted nops. Nondeterminism of the automata permits follows
8999 all possible insn sequences very fast.
9001 Unfortunately it is not possible to get information about inserting
9002 nop insns and used templates from the automata states. The
9003 automata only says that we can issue an insn possibly inserting
9004 some nops before it and using some template. Therefore insn
9005 bundling in this function is implemented by using DFA
9006 (deterministic finite automata). We follow all possible insn
9007 sequences by inserting 0-2 nops (that is what the NDFA describe for
9008 insn scheduling) before/after each insn being bundled. We know the
9009 start of simulated processor cycle from insn scheduling (insn
9010 starting a new cycle has TImode).
9012 Simple implementation of insn bundling would create enormous
9013 number of possible insn sequences satisfying information about new
9014 cycle ticks taken from the insn scheduling. To make the algorithm
9015 practical we use dynamic programming. Each decision (about
9016 inserting nops and implicitly about previous decisions) is described
9017 by structure bundle_state (see above). If we generate the same
9018 bundle state (key is automaton state after issuing the insns and
9019 nops for it), we reuse already generated one. As consequence we
9020 reject some decisions which cannot improve the solution and
9021 reduce memory for the algorithm.
9023 When we reach the end of EBB (extended basic block), we choose the
9024 best sequence and then, moving back in EBB, insert templates for
9025 the best alternative. The templates are taken from querying
9026 automaton state for each insn in chosen bundle states.
9028 So the algorithm makes two (forward and backward) passes through
9029 EBB. */
9031 static void
9032 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9034 struct bundle_state *curr_state, *next_state, *best_state;
9035 rtx_insn *insn, *next_insn;
9036 int insn_num;
9037 int i, bundle_end_p, only_bundle_end_p, asm_p;
9038 int pos = 0, max_pos, template0, template1;
9039 rtx_insn *b;
9040 enum attr_type type;
9042 insn_num = 0;
9043 /* Count insns in the EBB. */
9044 for (insn = NEXT_INSN (prev_head_insn);
9045 insn && insn != tail;
9046 insn = NEXT_INSN (insn))
9047 if (INSN_P (insn))
9048 insn_num++;
9049 if (insn_num == 0)
9050 return;
9051 bundling_p = 1;
9052 dfa_clean_insn_cache ();
9053 initiate_bundle_state_table ();
9054 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9055 /* First (forward) pass -- generation of bundle states. */
9056 curr_state = get_free_bundle_state ();
9057 curr_state->insn = NULL;
9058 curr_state->before_nops_num = 0;
9059 curr_state->after_nops_num = 0;
9060 curr_state->insn_num = 0;
9061 curr_state->cost = 0;
9062 curr_state->accumulated_insns_num = 0;
9063 curr_state->branch_deviation = 0;
9064 curr_state->middle_bundle_stops = 0;
9065 curr_state->next = NULL;
9066 curr_state->originator = NULL;
9067 state_reset (curr_state->dfa_state);
9068 index_to_bundle_states [0] = curr_state;
9069 insn_num = 0;
9070 /* Shift cycle mark if it is put on insn which could be ignored. */
9071 for (insn = NEXT_INSN (prev_head_insn);
9072 insn != tail;
9073 insn = NEXT_INSN (insn))
9074 if (INSN_P (insn)
9075 && !important_for_bundling_p (insn)
9076 && GET_MODE (insn) == TImode)
9078 PUT_MODE (insn, VOIDmode);
9079 for (next_insn = NEXT_INSN (insn);
9080 next_insn != tail;
9081 next_insn = NEXT_INSN (next_insn))
9082 if (important_for_bundling_p (next_insn)
9083 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9085 PUT_MODE (next_insn, TImode);
9086 break;
9089 /* Forward pass: generation of bundle states. */
9090 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9091 insn != NULL_RTX;
9092 insn = next_insn)
9094 gcc_assert (important_for_bundling_p (insn));
9095 type = ia64_safe_type (insn);
9096 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9097 insn_num++;
9098 index_to_bundle_states [insn_num] = NULL;
9099 for (curr_state = index_to_bundle_states [insn_num - 1];
9100 curr_state != NULL;
9101 curr_state = next_state)
9103 pos = curr_state->accumulated_insns_num % 3;
9104 next_state = curr_state->next;
9105 /* We must fill up the current bundle in order to start a
9106 subsequent asm insn in a new bundle. Asm insn is always
9107 placed in a separate bundle. */
9108 only_bundle_end_p
9109 = (next_insn != NULL_RTX
9110 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9111 && unknown_for_bundling_p (next_insn));
9112 /* We may fill up the current bundle if it is the cycle end
9113 without a group barrier. */
9114 bundle_end_p
9115 = (only_bundle_end_p || next_insn == NULL_RTX
9116 || (GET_MODE (next_insn) == TImode
9117 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9118 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9119 || type == TYPE_S)
9120 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9121 only_bundle_end_p);
9122 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9123 only_bundle_end_p);
9124 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9125 only_bundle_end_p);
9127 gcc_assert (index_to_bundle_states [insn_num]);
9128 for (curr_state = index_to_bundle_states [insn_num];
9129 curr_state != NULL;
9130 curr_state = curr_state->next)
9131 if (verbose >= 2 && dump)
9133 /* This structure is taken from generated code of the
9134 pipeline hazard recognizer (see file insn-attrtab.c).
9135 Please don't forget to change the structure if a new
9136 automaton is added to .md file. */
9137 struct DFA_chip
9139 unsigned short one_automaton_state;
9140 unsigned short oneb_automaton_state;
9141 unsigned short two_automaton_state;
9142 unsigned short twob_automaton_state;
9145 fprintf
9146 (dump,
9147 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9148 curr_state->unique_num,
9149 (curr_state->originator == NULL
9150 ? -1 : curr_state->originator->unique_num),
9151 curr_state->cost,
9152 curr_state->before_nops_num, curr_state->after_nops_num,
9153 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9154 curr_state->middle_bundle_stops,
9155 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9156 INSN_UID (insn));
9160 /* We should find a solution because the 2nd insn scheduling has
9161 found one. */
9162 gcc_assert (index_to_bundle_states [insn_num]);
9163 /* Find a state corresponding to the best insn sequence. */
9164 best_state = NULL;
9165 for (curr_state = index_to_bundle_states [insn_num];
9166 curr_state != NULL;
9167 curr_state = curr_state->next)
9168 /* We are just looking at the states with fully filled up last
9169 bundle. The first we prefer insn sequences with minimal cost
9170 then with minimal inserted nops and finally with branch insns
9171 placed in the 3rd slots. */
9172 if (curr_state->accumulated_insns_num % 3 == 0
9173 && (best_state == NULL || best_state->cost > curr_state->cost
9174 || (best_state->cost == curr_state->cost
9175 && (curr_state->accumulated_insns_num
9176 < best_state->accumulated_insns_num
9177 || (curr_state->accumulated_insns_num
9178 == best_state->accumulated_insns_num
9179 && (curr_state->branch_deviation
9180 < best_state->branch_deviation
9181 || (curr_state->branch_deviation
9182 == best_state->branch_deviation
9183 && curr_state->middle_bundle_stops
9184 < best_state->middle_bundle_stops)))))))
9185 best_state = curr_state;
9186 /* Second (backward) pass: adding nops and templates. */
9187 gcc_assert (best_state);
9188 insn_num = best_state->before_nops_num;
9189 template0 = template1 = -1;
9190 for (curr_state = best_state;
9191 curr_state->originator != NULL;
9192 curr_state = curr_state->originator)
9194 insn = curr_state->insn;
9195 asm_p = unknown_for_bundling_p (insn);
9196 insn_num++;
9197 if (verbose >= 2 && dump)
9199 struct DFA_chip
9201 unsigned short one_automaton_state;
9202 unsigned short oneb_automaton_state;
9203 unsigned short two_automaton_state;
9204 unsigned short twob_automaton_state;
9207 fprintf
9208 (dump,
9209 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9210 curr_state->unique_num,
9211 (curr_state->originator == NULL
9212 ? -1 : curr_state->originator->unique_num),
9213 curr_state->cost,
9214 curr_state->before_nops_num, curr_state->after_nops_num,
9215 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9216 curr_state->middle_bundle_stops,
9217 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9218 INSN_UID (insn));
9220 /* Find the position in the current bundle window. The window can
9221 contain at most two bundles. Two bundle window means that
9222 the processor will make two bundle rotation. */
9223 max_pos = get_max_pos (curr_state->dfa_state);
9224 if (max_pos == 6
9225 /* The following (negative template number) means that the
9226 processor did one bundle rotation. */
9227 || (max_pos == 3 && template0 < 0))
9229 /* We are at the end of the window -- find template(s) for
9230 its bundle(s). */
9231 pos = max_pos;
9232 if (max_pos == 3)
9233 template0 = get_template (curr_state->dfa_state, 3);
9234 else
9236 template1 = get_template (curr_state->dfa_state, 3);
9237 template0 = get_template (curr_state->dfa_state, 6);
9240 if (max_pos > 3 && template1 < 0)
9241 /* It may happen when we have the stop inside a bundle. */
9243 gcc_assert (pos <= 3);
9244 template1 = get_template (curr_state->dfa_state, 3);
9245 pos += 3;
9247 if (!asm_p)
9248 /* Emit nops after the current insn. */
9249 for (i = 0; i < curr_state->after_nops_num; i++)
9251 rtx nop_pat = gen_nop ();
9252 rtx_insn *nop = emit_insn_after (nop_pat, insn);
9253 pos--;
9254 gcc_assert (pos >= 0);
9255 if (pos % 3 == 0)
9257 /* We are at the start of a bundle: emit the template
9258 (it should be defined). */
9259 gcc_assert (template0 >= 0);
9260 ia64_add_bundle_selector_before (template0, nop);
9261 /* If we have two bundle window, we make one bundle
9262 rotation. Otherwise template0 will be undefined
9263 (negative value). */
9264 template0 = template1;
9265 template1 = -1;
9268 /* Move the position backward in the window. Group barrier has
9269 no slot. Asm insn takes all bundle. */
9270 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9271 && !unknown_for_bundling_p (insn))
9272 pos--;
9273 /* Long insn takes 2 slots. */
9274 if (ia64_safe_type (insn) == TYPE_L)
9275 pos--;
9276 gcc_assert (pos >= 0);
9277 if (pos % 3 == 0
9278 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9279 && !unknown_for_bundling_p (insn))
9281 /* The current insn is at the bundle start: emit the
9282 template. */
9283 gcc_assert (template0 >= 0);
9284 ia64_add_bundle_selector_before (template0, insn);
9285 b = PREV_INSN (insn);
9286 insn = b;
9287 /* See comment above in analogous place for emitting nops
9288 after the insn. */
9289 template0 = template1;
9290 template1 = -1;
9292 /* Emit nops after the current insn. */
9293 for (i = 0; i < curr_state->before_nops_num; i++)
9295 rtx nop_pat = gen_nop ();
9296 ia64_emit_insn_before (nop_pat, insn);
9297 rtx_insn *nop = PREV_INSN (insn);
9298 insn = nop;
9299 pos--;
9300 gcc_assert (pos >= 0);
9301 if (pos % 3 == 0)
9303 /* See comment above in analogous place for emitting nops
9304 after the insn. */
9305 gcc_assert (template0 >= 0);
9306 ia64_add_bundle_selector_before (template0, insn);
9307 b = PREV_INSN (insn);
9308 insn = b;
9309 template0 = template1;
9310 template1 = -1;
9315 if (flag_checking)
9317 /* Assert right calculation of middle_bundle_stops. */
9318 int num = best_state->middle_bundle_stops;
9319 bool start_bundle = true, end_bundle = false;
9321 for (insn = NEXT_INSN (prev_head_insn);
9322 insn && insn != tail;
9323 insn = NEXT_INSN (insn))
9325 if (!INSN_P (insn))
9326 continue;
9327 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9328 start_bundle = true;
9329 else
9331 rtx_insn *next_insn;
9333 for (next_insn = NEXT_INSN (insn);
9334 next_insn && next_insn != tail;
9335 next_insn = NEXT_INSN (next_insn))
9336 if (INSN_P (next_insn)
9337 && (ia64_safe_itanium_class (next_insn)
9338 != ITANIUM_CLASS_IGNORE
9339 || recog_memoized (next_insn)
9340 == CODE_FOR_bundle_selector)
9341 && GET_CODE (PATTERN (next_insn)) != USE
9342 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9343 break;
9345 end_bundle = next_insn == NULL_RTX
9346 || next_insn == tail
9347 || (INSN_P (next_insn)
9348 && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9349 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9350 && !start_bundle && !end_bundle
9351 && next_insn
9352 && !unknown_for_bundling_p (next_insn))
9353 num--;
9355 start_bundle = false;
9359 gcc_assert (num == 0);
9362 free (index_to_bundle_states);
9363 finish_bundle_state_table ();
9364 bundling_p = 0;
9365 dfa_clean_insn_cache ();
9368 /* The following function is called at the end of scheduling BB or
9369 EBB. After reload, it inserts stop bits and does insn bundling. */
9371 static void
9372 ia64_sched_finish (FILE *dump, int sched_verbose)
9374 if (sched_verbose)
9375 fprintf (dump, "// Finishing schedule.\n");
9376 if (!reload_completed)
9377 return;
9378 if (reload_completed)
9380 final_emit_insn_group_barriers (dump);
9381 bundling (dump, sched_verbose, current_sched_info->prev_head,
9382 current_sched_info->next_tail);
9383 if (sched_verbose && dump)
9384 fprintf (dump, "// finishing %d-%d\n",
9385 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9386 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9388 return;
9392 /* The following function inserts stop bits in scheduled BB or EBB. */
9394 static void
9395 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9397 rtx_insn *insn;
9398 int need_barrier_p = 0;
9399 int seen_good_insn = 0;
9401 init_insn_group_barriers ();
9403 for (insn = NEXT_INSN (current_sched_info->prev_head);
9404 insn != current_sched_info->next_tail;
9405 insn = NEXT_INSN (insn))
9407 if (BARRIER_P (insn))
9409 rtx_insn *last = prev_active_insn (insn);
9411 if (! last)
9412 continue;
9413 if (JUMP_TABLE_DATA_P (last))
9414 last = prev_active_insn (last);
9415 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9416 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9418 init_insn_group_barriers ();
9419 seen_good_insn = 0;
9420 need_barrier_p = 0;
9422 else if (NONDEBUG_INSN_P (insn))
9424 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9426 init_insn_group_barriers ();
9427 seen_good_insn = 0;
9428 need_barrier_p = 0;
9430 else if (need_barrier_p || group_barrier_needed (insn)
9431 || (mflag_sched_stop_bits_after_every_cycle
9432 && GET_MODE (insn) == TImode
9433 && seen_good_insn))
9435 if (TARGET_EARLY_STOP_BITS)
9437 rtx_insn *last;
9439 for (last = insn;
9440 last != current_sched_info->prev_head;
9441 last = PREV_INSN (last))
9442 if (INSN_P (last) && GET_MODE (last) == TImode
9443 && stops_p [INSN_UID (last)])
9444 break;
9445 if (last == current_sched_info->prev_head)
9446 last = insn;
9447 last = prev_active_insn (last);
9448 if (last
9449 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9450 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9451 last);
9452 init_insn_group_barriers ();
9453 for (last = NEXT_INSN (last);
9454 last != insn;
9455 last = NEXT_INSN (last))
9456 if (INSN_P (last))
9458 group_barrier_needed (last);
9459 if (recog_memoized (last) >= 0
9460 && important_for_bundling_p (last))
9461 seen_good_insn = 1;
9464 else
9466 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9467 insn);
9468 init_insn_group_barriers ();
9469 seen_good_insn = 0;
9471 group_barrier_needed (insn);
9472 if (recog_memoized (insn) >= 0
9473 && important_for_bundling_p (insn))
9474 seen_good_insn = 1;
9476 else if (recog_memoized (insn) >= 0
9477 && important_for_bundling_p (insn))
9478 seen_good_insn = 1;
9479 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9486 /* If the following function returns TRUE, we will use the DFA
9487 insn scheduler. */
9489 static int
9490 ia64_first_cycle_multipass_dfa_lookahead (void)
9492 return (reload_completed ? 6 : 4);
9495 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9497 static void
9498 ia64_init_dfa_pre_cycle_insn (void)
9500 if (temp_dfa_state == NULL)
9502 dfa_state_size = state_size ();
9503 temp_dfa_state = xmalloc (dfa_state_size);
9504 prev_cycle_state = xmalloc (dfa_state_size);
9506 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9507 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9508 recog_memoized (dfa_pre_cycle_insn);
9509 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9510 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9511 recog_memoized (dfa_stop_insn);
9514 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9515 used by the DFA insn scheduler. */
9517 static rtx
9518 ia64_dfa_pre_cycle_insn (void)
9520 return dfa_pre_cycle_insn;
9523 /* The following function returns TRUE if PRODUCER (of type ilog or
9524 ld) produces address for CONSUMER (of type st or stf). */
9527 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9529 rtx dest, reg, mem;
9531 gcc_assert (producer && consumer);
9532 dest = ia64_single_set (producer);
9533 gcc_assert (dest);
9534 reg = SET_DEST (dest);
9535 gcc_assert (reg);
9536 if (GET_CODE (reg) == SUBREG)
9537 reg = SUBREG_REG (reg);
9538 gcc_assert (GET_CODE (reg) == REG);
9540 dest = ia64_single_set (consumer);
9541 gcc_assert (dest);
9542 mem = SET_DEST (dest);
9543 gcc_assert (mem && GET_CODE (mem) == MEM);
9544 return reg_mentioned_p (reg, mem);
9547 /* The following function returns TRUE if PRODUCER (of type ilog or
9548 ld) produces address for CONSUMER (of type ld or fld). */
9551 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9553 rtx dest, src, reg, mem;
9555 gcc_assert (producer && consumer);
9556 dest = ia64_single_set (producer);
9557 gcc_assert (dest);
9558 reg = SET_DEST (dest);
9559 gcc_assert (reg);
9560 if (GET_CODE (reg) == SUBREG)
9561 reg = SUBREG_REG (reg);
9562 gcc_assert (GET_CODE (reg) == REG);
9564 src = ia64_single_set (consumer);
9565 gcc_assert (src);
9566 mem = SET_SRC (src);
9567 gcc_assert (mem);
9569 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9570 mem = XVECEXP (mem, 0, 0);
9571 else if (GET_CODE (mem) == IF_THEN_ELSE)
9572 /* ??? Is this bypass necessary for ld.c? */
9574 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9575 mem = XEXP (mem, 1);
9578 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9579 mem = XEXP (mem, 0);
9581 if (GET_CODE (mem) == UNSPEC)
9583 int c = XINT (mem, 1);
9585 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9586 || c == UNSPEC_LDSA);
9587 mem = XVECEXP (mem, 0, 0);
9590 /* Note that LO_SUM is used for GOT loads. */
9591 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9593 return reg_mentioned_p (reg, mem);
9596 /* The following function returns TRUE if INSN produces address for a
9597 load/store insn. We will place such insns into M slot because it
9598 decreases its latency time. */
9601 ia64_produce_address_p (rtx insn)
9603 return insn->call;
9607 /* Emit pseudo-ops for the assembler to describe predicate relations.
9608 At present this assumes that we only consider predicate pairs to
9609 be mutex, and that the assembler can deduce proper values from
9610 straight-line code. */
9612 static void
9613 emit_predicate_relation_info (void)
9615 basic_block bb;
9617 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9619 int r;
9620 rtx_insn *head = BB_HEAD (bb);
9622 /* We only need such notes at code labels. */
9623 if (! LABEL_P (head))
9624 continue;
9625 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9626 head = NEXT_INSN (head);
9628 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9629 grabbing the entire block of predicate registers. */
9630 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9631 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9633 rtx p = gen_rtx_REG (BImode, r);
9634 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9635 if (head == BB_END (bb))
9636 BB_END (bb) = n;
9637 head = n;
9641 /* Look for conditional calls that do not return, and protect predicate
9642 relations around them. Otherwise the assembler will assume the call
9643 returns, and complain about uses of call-clobbered predicates after
9644 the call. */
9645 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9647 rtx_insn *insn = BB_HEAD (bb);
9649 while (1)
9651 if (CALL_P (insn)
9652 && GET_CODE (PATTERN (insn)) == COND_EXEC
9653 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9655 rtx_insn *b =
9656 emit_insn_before (gen_safe_across_calls_all (), insn);
9657 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9658 if (BB_HEAD (bb) == insn)
9659 BB_HEAD (bb) = b;
9660 if (BB_END (bb) == insn)
9661 BB_END (bb) = a;
9664 if (insn == BB_END (bb))
9665 break;
9666 insn = NEXT_INSN (insn);
9671 /* Perform machine dependent operations on the rtl chain INSNS. */
9673 static void
9674 ia64_reorg (void)
9676 /* We are freeing block_for_insn in the toplev to keep compatibility
9677 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9678 compute_bb_for_insn ();
9680 /* If optimizing, we'll have split before scheduling. */
9681 if (optimize == 0)
9682 split_all_insns ();
9684 if (optimize && flag_schedule_insns_after_reload
9685 && dbg_cnt (ia64_sched2))
9687 basic_block bb;
9688 timevar_push (TV_SCHED2);
9689 ia64_final_schedule = 1;
9691 /* We can't let modulo-sched prevent us from scheduling any bbs,
9692 since we need the final schedule to produce bundle information. */
9693 FOR_EACH_BB_FN (bb, cfun)
9694 bb->flags &= ~BB_DISABLE_SCHEDULE;
9696 initiate_bundle_states ();
9697 ia64_nop = make_insn_raw (gen_nop ());
9698 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9699 recog_memoized (ia64_nop);
9700 clocks_length = get_max_uid () + 1;
9701 stops_p = XCNEWVEC (char, clocks_length);
9703 if (ia64_tune == PROCESSOR_ITANIUM2)
9705 pos_1 = get_cpu_unit_code ("2_1");
9706 pos_2 = get_cpu_unit_code ("2_2");
9707 pos_3 = get_cpu_unit_code ("2_3");
9708 pos_4 = get_cpu_unit_code ("2_4");
9709 pos_5 = get_cpu_unit_code ("2_5");
9710 pos_6 = get_cpu_unit_code ("2_6");
9711 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9712 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9713 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9714 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9715 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9716 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9717 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9718 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9719 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9720 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9721 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9722 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9723 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9724 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9725 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9726 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9727 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9728 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9729 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9730 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9732 else
9734 pos_1 = get_cpu_unit_code ("1_1");
9735 pos_2 = get_cpu_unit_code ("1_2");
9736 pos_3 = get_cpu_unit_code ("1_3");
9737 pos_4 = get_cpu_unit_code ("1_4");
9738 pos_5 = get_cpu_unit_code ("1_5");
9739 pos_6 = get_cpu_unit_code ("1_6");
9740 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9741 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9742 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9743 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9744 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9745 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9746 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9747 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9748 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9749 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9750 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9751 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9752 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9753 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9754 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9755 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9756 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9757 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9758 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9759 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9762 if (flag_selective_scheduling2
9763 && !maybe_skip_selective_scheduling ())
9764 run_selective_scheduling ();
9765 else
9766 schedule_ebbs ();
9768 /* Redo alignment computation, as it might gone wrong. */
9769 compute_alignments ();
9771 /* We cannot reuse this one because it has been corrupted by the
9772 evil glat. */
9773 finish_bundle_states ();
9774 free (stops_p);
9775 stops_p = NULL;
9776 emit_insn_group_barriers (dump_file);
9778 ia64_final_schedule = 0;
9779 timevar_pop (TV_SCHED2);
9781 else
9782 emit_all_insn_group_barriers (dump_file);
9784 df_analyze ();
9786 /* A call must not be the last instruction in a function, so that the
9787 return address is still within the function, so that unwinding works
9788 properly. Note that IA-64 differs from dwarf2 on this point. */
9789 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9791 rtx_insn *insn;
9792 int saw_stop = 0;
9794 insn = get_last_insn ();
9795 if (! INSN_P (insn))
9796 insn = prev_active_insn (insn);
9797 if (insn)
9799 /* Skip over insns that expand to nothing. */
9800 while (NONJUMP_INSN_P (insn)
9801 && get_attr_empty (insn) == EMPTY_YES)
9803 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9804 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9805 saw_stop = 1;
9806 insn = prev_active_insn (insn);
9808 if (CALL_P (insn))
9810 if (! saw_stop)
9811 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9812 emit_insn (gen_break_f ());
9813 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9818 emit_predicate_relation_info ();
9820 if (flag_var_tracking)
9822 timevar_push (TV_VAR_TRACKING);
9823 variable_tracking_main ();
9824 timevar_pop (TV_VAR_TRACKING);
9826 df_finish_pass (false);
9829 /* Return true if REGNO is used by the epilogue. */
9832 ia64_epilogue_uses (int regno)
9834 switch (regno)
9836 case R_GR (1):
9837 /* With a call to a function in another module, we will write a new
9838 value to "gp". After returning from such a call, we need to make
9839 sure the function restores the original gp-value, even if the
9840 function itself does not use the gp anymore. */
9841 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9843 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9844 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9845 /* For functions defined with the syscall_linkage attribute, all
9846 input registers are marked as live at all function exits. This
9847 prevents the register allocator from using the input registers,
9848 which in turn makes it possible to restart a system call after
9849 an interrupt without having to save/restore the input registers.
9850 This also prevents kernel data from leaking to application code. */
9851 return lookup_attribute ("syscall_linkage",
9852 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9854 case R_BR (0):
9855 /* Conditional return patterns can't represent the use of `b0' as
9856 the return address, so we force the value live this way. */
9857 return 1;
9859 case AR_PFS_REGNUM:
9860 /* Likewise for ar.pfs, which is used by br.ret. */
9861 return 1;
9863 default:
9864 return 0;
9868 /* Return true if REGNO is used by the frame unwinder. */
9871 ia64_eh_uses (int regno)
9873 unsigned int r;
9875 if (! reload_completed)
9876 return 0;
9878 if (regno == 0)
9879 return 0;
9881 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9882 if (regno == current_frame_info.r[r]
9883 || regno == emitted_frame_related_regs[r])
9884 return 1;
9886 return 0;
9889 /* Return true if this goes in small data/bss. */
9891 /* ??? We could also support own long data here. Generating movl/add/ld8
9892 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9893 code faster because there is one less load. This also includes incomplete
9894 types which can't go in sdata/sbss. */
9896 static bool
9897 ia64_in_small_data_p (const_tree exp)
9899 if (TARGET_NO_SDATA)
9900 return false;
9902 /* We want to merge strings, so we never consider them small data. */
9903 if (TREE_CODE (exp) == STRING_CST)
9904 return false;
9906 /* Functions are never small data. */
9907 if (TREE_CODE (exp) == FUNCTION_DECL)
9908 return false;
9910 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9912 const char *section = DECL_SECTION_NAME (exp);
9914 if (strcmp (section, ".sdata") == 0
9915 || strncmp (section, ".sdata.", 7) == 0
9916 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9917 || strcmp (section, ".sbss") == 0
9918 || strncmp (section, ".sbss.", 6) == 0
9919 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9920 return true;
9922 else
9924 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9926 /* If this is an incomplete type with size 0, then we can't put it
9927 in sdata because it might be too big when completed. */
9928 if (size > 0 && size <= ia64_section_threshold)
9929 return true;
9932 return false;
9935 /* Output assembly directives for prologue regions. */
9937 /* The current basic block number. */
9939 static bool last_block;
9941 /* True if we need a copy_state command at the start of the next block. */
9943 static bool need_copy_state;
9945 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9946 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9947 #endif
9949 /* The function emits unwind directives for the start of an epilogue. */
9951 static void
9952 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9953 bool unwind, bool frame ATTRIBUTE_UNUSED)
9955 /* If this isn't the last block of the function, then we need to label the
9956 current state, and copy it back in at the start of the next block. */
9958 if (!last_block)
9960 if (unwind)
9961 fprintf (asm_out_file, "\t.label_state %d\n",
9962 ++cfun->machine->state_num);
9963 need_copy_state = true;
9966 if (unwind)
9967 fprintf (asm_out_file, "\t.restore sp\n");
9970 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9972 static void
9973 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9974 bool unwind, bool frame)
9976 rtx dest = SET_DEST (pat);
9977 rtx src = SET_SRC (pat);
9979 if (dest == stack_pointer_rtx)
9981 if (GET_CODE (src) == PLUS)
9983 rtx op0 = XEXP (src, 0);
9984 rtx op1 = XEXP (src, 1);
9986 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9988 if (INTVAL (op1) < 0)
9990 gcc_assert (!frame_pointer_needed);
9991 if (unwind)
9992 fprintf (asm_out_file,
9993 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
9994 -INTVAL (op1));
9996 else
9997 process_epilogue (asm_out_file, insn, unwind, frame);
9999 else
10001 gcc_assert (src == hard_frame_pointer_rtx);
10002 process_epilogue (asm_out_file, insn, unwind, frame);
10005 else if (dest == hard_frame_pointer_rtx)
10007 gcc_assert (src == stack_pointer_rtx);
10008 gcc_assert (frame_pointer_needed);
10010 if (unwind)
10011 fprintf (asm_out_file, "\t.vframe r%d\n",
10012 ia64_dbx_register_number (REGNO (dest)));
10014 else
10015 gcc_unreachable ();
10018 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10020 static void
10021 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10023 rtx dest = SET_DEST (pat);
10024 rtx src = SET_SRC (pat);
10025 int dest_regno = REGNO (dest);
10026 int src_regno;
10028 if (src == pc_rtx)
10030 /* Saving return address pointer. */
10031 if (unwind)
10032 fprintf (asm_out_file, "\t.save rp, r%d\n",
10033 ia64_dbx_register_number (dest_regno));
10034 return;
10037 src_regno = REGNO (src);
10039 switch (src_regno)
10041 case PR_REG (0):
10042 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10043 if (unwind)
10044 fprintf (asm_out_file, "\t.save pr, r%d\n",
10045 ia64_dbx_register_number (dest_regno));
10046 break;
10048 case AR_UNAT_REGNUM:
10049 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10050 if (unwind)
10051 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10052 ia64_dbx_register_number (dest_regno));
10053 break;
10055 case AR_LC_REGNUM:
10056 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10057 if (unwind)
10058 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10059 ia64_dbx_register_number (dest_regno));
10060 break;
10062 default:
10063 /* Everything else should indicate being stored to memory. */
10064 gcc_unreachable ();
10068 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10070 static void
10071 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10073 rtx dest = SET_DEST (pat);
10074 rtx src = SET_SRC (pat);
10075 int src_regno = REGNO (src);
10076 const char *saveop;
10077 HOST_WIDE_INT off;
10078 rtx base;
10080 gcc_assert (MEM_P (dest));
10081 if (GET_CODE (XEXP (dest, 0)) == REG)
10083 base = XEXP (dest, 0);
10084 off = 0;
10086 else
10088 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10089 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10090 base = XEXP (XEXP (dest, 0), 0);
10091 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10094 if (base == hard_frame_pointer_rtx)
10096 saveop = ".savepsp";
10097 off = - off;
10099 else
10101 gcc_assert (base == stack_pointer_rtx);
10102 saveop = ".savesp";
10105 src_regno = REGNO (src);
10106 switch (src_regno)
10108 case BR_REG (0):
10109 gcc_assert (!current_frame_info.r[reg_save_b0]);
10110 if (unwind)
10111 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10112 saveop, off);
10113 break;
10115 case PR_REG (0):
10116 gcc_assert (!current_frame_info.r[reg_save_pr]);
10117 if (unwind)
10118 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10119 saveop, off);
10120 break;
10122 case AR_LC_REGNUM:
10123 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10124 if (unwind)
10125 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10126 saveop, off);
10127 break;
10129 case AR_PFS_REGNUM:
10130 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10131 if (unwind)
10132 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10133 saveop, off);
10134 break;
10136 case AR_UNAT_REGNUM:
10137 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10138 if (unwind)
10139 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10140 saveop, off);
10141 break;
10143 case GR_REG (4):
10144 case GR_REG (5):
10145 case GR_REG (6):
10146 case GR_REG (7):
10147 if (unwind)
10148 fprintf (asm_out_file, "\t.save.g 0x%x\n",
10149 1 << (src_regno - GR_REG (4)));
10150 break;
10152 case BR_REG (1):
10153 case BR_REG (2):
10154 case BR_REG (3):
10155 case BR_REG (4):
10156 case BR_REG (5):
10157 if (unwind)
10158 fprintf (asm_out_file, "\t.save.b 0x%x\n",
10159 1 << (src_regno - BR_REG (1)));
10160 break;
10162 case FR_REG (2):
10163 case FR_REG (3):
10164 case FR_REG (4):
10165 case FR_REG (5):
10166 if (unwind)
10167 fprintf (asm_out_file, "\t.save.f 0x%x\n",
10168 1 << (src_regno - FR_REG (2)));
10169 break;
10171 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10172 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10173 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10174 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10175 if (unwind)
10176 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10177 1 << (src_regno - FR_REG (12)));
10178 break;
10180 default:
10181 /* ??? For some reason we mark other general registers, even those
10182 we can't represent in the unwind info. Ignore them. */
10183 break;
10187 /* This function looks at a single insn and emits any directives
10188 required to unwind this insn. */
10190 static void
10191 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10193 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10194 bool frame = dwarf2out_do_frame ();
10195 rtx note, pat;
10196 bool handled_one;
10198 if (!unwind && !frame)
10199 return;
10201 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10203 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10204 == EXIT_BLOCK_PTR_FOR_FN (cfun);
10206 /* Restore unwind state from immediately before the epilogue. */
10207 if (need_copy_state)
10209 if (unwind)
10211 fprintf (asm_out_file, "\t.body\n");
10212 fprintf (asm_out_file, "\t.copy_state %d\n",
10213 cfun->machine->state_num);
10215 need_copy_state = false;
10219 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10220 return;
10222 /* Look for the ALLOC insn. */
10223 if (INSN_CODE (insn) == CODE_FOR_alloc)
10225 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10226 int dest_regno = REGNO (dest);
10228 /* If this is the final destination for ar.pfs, then this must
10229 be the alloc in the prologue. */
10230 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10232 if (unwind)
10233 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10234 ia64_dbx_register_number (dest_regno));
10236 else
10238 /* This must be an alloc before a sibcall. We must drop the
10239 old frame info. The easiest way to drop the old frame
10240 info is to ensure we had a ".restore sp" directive
10241 followed by a new prologue. If the procedure doesn't
10242 have a memory-stack frame, we'll issue a dummy ".restore
10243 sp" now. */
10244 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10245 /* if haven't done process_epilogue() yet, do it now */
10246 process_epilogue (asm_out_file, insn, unwind, frame);
10247 if (unwind)
10248 fprintf (asm_out_file, "\t.prologue\n");
10250 return;
10253 handled_one = false;
10254 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10255 switch (REG_NOTE_KIND (note))
10257 case REG_CFA_ADJUST_CFA:
10258 pat = XEXP (note, 0);
10259 if (pat == NULL)
10260 pat = PATTERN (insn);
10261 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10262 handled_one = true;
10263 break;
10265 case REG_CFA_OFFSET:
10266 pat = XEXP (note, 0);
10267 if (pat == NULL)
10268 pat = PATTERN (insn);
10269 process_cfa_offset (asm_out_file, pat, unwind);
10270 handled_one = true;
10271 break;
10273 case REG_CFA_REGISTER:
10274 pat = XEXP (note, 0);
10275 if (pat == NULL)
10276 pat = PATTERN (insn);
10277 process_cfa_register (asm_out_file, pat, unwind);
10278 handled_one = true;
10279 break;
10281 case REG_FRAME_RELATED_EXPR:
10282 case REG_CFA_DEF_CFA:
10283 case REG_CFA_EXPRESSION:
10284 case REG_CFA_RESTORE:
10285 case REG_CFA_SET_VDRAP:
10286 /* Not used in the ia64 port. */
10287 gcc_unreachable ();
10289 default:
10290 /* Not a frame-related note. */
10291 break;
10294 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10295 explicit action to take. No guessing required. */
10296 gcc_assert (handled_one);
10299 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10301 static void
10302 ia64_asm_emit_except_personality (rtx personality)
10304 fputs ("\t.personality\t", asm_out_file);
10305 output_addr_const (asm_out_file, personality);
10306 fputc ('\n', asm_out_file);
10309 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10311 static void
10312 ia64_asm_init_sections (void)
10314 exception_section = get_unnamed_section (0, output_section_asm_op,
10315 "\t.handlerdata");
10318 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10320 static enum unwind_info_type
10321 ia64_debug_unwind_info (void)
10323 return UI_TARGET;
10326 enum ia64_builtins
10328 IA64_BUILTIN_BSP,
10329 IA64_BUILTIN_COPYSIGNQ,
10330 IA64_BUILTIN_FABSQ,
10331 IA64_BUILTIN_FLUSHRS,
10332 IA64_BUILTIN_INFQ,
10333 IA64_BUILTIN_HUGE_VALQ,
10334 IA64_BUILTIN_NANQ,
10335 IA64_BUILTIN_NANSQ,
10336 IA64_BUILTIN_max
10339 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10341 void
10342 ia64_init_builtins (void)
10344 tree fpreg_type;
10345 tree float80_type;
10346 tree decl;
10348 /* The __fpreg type. */
10349 fpreg_type = make_node (REAL_TYPE);
10350 TYPE_PRECISION (fpreg_type) = 82;
10351 layout_type (fpreg_type);
10352 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10354 /* The __float80 type. */
10355 if (float64x_type_node != NULL_TREE
10356 && TYPE_MODE (float64x_type_node) == XFmode)
10357 float80_type = float64x_type_node;
10358 else
10360 float80_type = make_node (REAL_TYPE);
10361 TYPE_PRECISION (float80_type) = 80;
10362 layout_type (float80_type);
10364 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10366 /* The __float128 type. */
10367 if (!TARGET_HPUX)
10369 tree ftype;
10370 tree const_string_type
10371 = build_pointer_type (build_qualified_type
10372 (char_type_node, TYPE_QUAL_CONST));
10374 (*lang_hooks.types.register_builtin_type) (float128_type_node,
10375 "__float128");
10377 /* TFmode support builtins. */
10378 ftype = build_function_type_list (float128_type_node, NULL_TREE);
10379 decl = add_builtin_function ("__builtin_infq", ftype,
10380 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10381 NULL, NULL_TREE);
10382 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10384 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10385 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10386 NULL, NULL_TREE);
10387 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10389 ftype = build_function_type_list (float128_type_node,
10390 const_string_type,
10391 NULL_TREE);
10392 decl = add_builtin_function ("__builtin_nanq", ftype,
10393 IA64_BUILTIN_NANQ, BUILT_IN_MD,
10394 "nanq", NULL_TREE);
10395 TREE_READONLY (decl) = 1;
10396 ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10398 decl = add_builtin_function ("__builtin_nansq", ftype,
10399 IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10400 "nansq", NULL_TREE);
10401 TREE_READONLY (decl) = 1;
10402 ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10404 ftype = build_function_type_list (float128_type_node,
10405 float128_type_node,
10406 NULL_TREE);
10407 decl = add_builtin_function ("__builtin_fabsq", ftype,
10408 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10409 "__fabstf2", NULL_TREE);
10410 TREE_READONLY (decl) = 1;
10411 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10413 ftype = build_function_type_list (float128_type_node,
10414 float128_type_node,
10415 float128_type_node,
10416 NULL_TREE);
10417 decl = add_builtin_function ("__builtin_copysignq", ftype,
10418 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10419 "__copysigntf3", NULL_TREE);
10420 TREE_READONLY (decl) = 1;
10421 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10423 else
10424 /* Under HPUX, this is a synonym for "long double". */
10425 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10426 "__float128");
10428 /* Fwrite on VMS is non-standard. */
10429 #if TARGET_ABI_OPEN_VMS
10430 vms_patch_builtins ();
10431 #endif
10433 #define def_builtin(name, type, code) \
10434 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10435 NULL, NULL_TREE)
10437 decl = def_builtin ("__builtin_ia64_bsp",
10438 build_function_type_list (ptr_type_node, NULL_TREE),
10439 IA64_BUILTIN_BSP);
10440 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10442 decl = def_builtin ("__builtin_ia64_flushrs",
10443 build_function_type_list (void_type_node, NULL_TREE),
10444 IA64_BUILTIN_FLUSHRS);
10445 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10447 #undef def_builtin
10449 if (TARGET_HPUX)
10451 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10452 set_user_assembler_name (decl, "_Isfinite");
10453 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10454 set_user_assembler_name (decl, "_Isfinitef");
10455 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10456 set_user_assembler_name (decl, "_Isfinitef128");
10460 static tree
10461 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10462 tree *args, bool ignore ATTRIBUTE_UNUSED)
10464 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10466 enum ia64_builtins fn_code = (enum ia64_builtins)
10467 DECL_FUNCTION_CODE (fndecl);
10468 switch (fn_code)
10470 case IA64_BUILTIN_NANQ:
10471 case IA64_BUILTIN_NANSQ:
10473 tree type = TREE_TYPE (TREE_TYPE (fndecl));
10474 const char *str = c_getstr (*args);
10475 int quiet = fn_code == IA64_BUILTIN_NANQ;
10476 REAL_VALUE_TYPE real;
10478 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10479 return build_real (type, real);
10480 return NULL_TREE;
10483 default:
10484 break;
10488 #ifdef SUBTARGET_FOLD_BUILTIN
10489 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10490 #endif
10492 return NULL_TREE;
10496 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10497 machine_mode mode ATTRIBUTE_UNUSED,
10498 int ignore ATTRIBUTE_UNUSED)
10500 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10501 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10503 switch (fcode)
10505 case IA64_BUILTIN_BSP:
10506 if (! target || ! register_operand (target, DImode))
10507 target = gen_reg_rtx (DImode);
10508 emit_insn (gen_bsp_value (target));
10509 #ifdef POINTERS_EXTEND_UNSIGNED
10510 target = convert_memory_address (ptr_mode, target);
10511 #endif
10512 return target;
10514 case IA64_BUILTIN_FLUSHRS:
10515 emit_insn (gen_flushrs ());
10516 return const0_rtx;
10518 case IA64_BUILTIN_INFQ:
10519 case IA64_BUILTIN_HUGE_VALQ:
10521 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10522 REAL_VALUE_TYPE inf;
10523 rtx tmp;
10525 real_inf (&inf);
10526 tmp = const_double_from_real_value (inf, target_mode);
10528 tmp = validize_mem (force_const_mem (target_mode, tmp));
10530 if (target == 0)
10531 target = gen_reg_rtx (target_mode);
10533 emit_move_insn (target, tmp);
10534 return target;
10537 case IA64_BUILTIN_NANQ:
10538 case IA64_BUILTIN_NANSQ:
10539 case IA64_BUILTIN_FABSQ:
10540 case IA64_BUILTIN_COPYSIGNQ:
10541 return expand_call (exp, target, ignore);
10543 default:
10544 gcc_unreachable ();
10547 return NULL_RTX;
10550 /* Return the ia64 builtin for CODE. */
10552 static tree
10553 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10555 if (code >= IA64_BUILTIN_max)
10556 return error_mark_node;
10558 return ia64_builtins[code];
10561 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10562 most significant bits of the stack slot. */
10564 enum direction
10565 ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
10567 /* Exception to normal case for structures/unions/etc. */
10569 if (type && AGGREGATE_TYPE_P (type)
10570 && int_size_in_bytes (type) < UNITS_PER_WORD)
10571 return upward;
10573 /* Fall back to the default. */
10574 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10577 /* Emit text to declare externally defined variables and functions, because
10578 the Intel assembler does not support undefined externals. */
10580 void
10581 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10583 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10584 set in order to avoid putting out names that are never really
10585 used. */
10586 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10588 /* maybe_assemble_visibility will return 1 if the assembler
10589 visibility directive is output. */
10590 int need_visibility = ((*targetm.binds_local_p) (decl)
10591 && maybe_assemble_visibility (decl));
10593 /* GNU as does not need anything here, but the HP linker does
10594 need something for external functions. */
10595 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10596 && TREE_CODE (decl) == FUNCTION_DECL)
10597 (*targetm.asm_out.globalize_decl_name) (file, decl);
10598 else if (need_visibility && !TARGET_GNU_AS)
10599 (*targetm.asm_out.globalize_label) (file, name);
10603 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10604 modes of word_mode and larger. Rename the TFmode libfuncs using the
10605 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10606 backward compatibility. */
10608 static void
10609 ia64_init_libfuncs (void)
10611 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10612 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10613 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10614 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10616 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10617 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10618 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10619 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10620 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10622 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10623 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10624 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10625 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10626 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10627 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10629 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10630 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10631 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10632 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10633 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10635 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10636 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10637 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10638 /* HP-UX 11.23 libc does not have a function for unsigned
10639 SImode-to-TFmode conversion. */
10640 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10643 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10645 static void
10646 ia64_hpux_init_libfuncs (void)
10648 ia64_init_libfuncs ();
10650 /* The HP SI millicode division and mod functions expect DI arguments.
10651 By turning them off completely we avoid using both libgcc and the
10652 non-standard millicode routines and use the HP DI millicode routines
10653 instead. */
10655 set_optab_libfunc (sdiv_optab, SImode, 0);
10656 set_optab_libfunc (udiv_optab, SImode, 0);
10657 set_optab_libfunc (smod_optab, SImode, 0);
10658 set_optab_libfunc (umod_optab, SImode, 0);
10660 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10661 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10662 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10663 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10665 /* HP-UX libc has TF min/max/abs routines in it. */
10666 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10667 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10668 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10670 /* ia64_expand_compare uses this. */
10671 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10673 /* These should never be used. */
10674 set_optab_libfunc (eq_optab, TFmode, 0);
10675 set_optab_libfunc (ne_optab, TFmode, 0);
10676 set_optab_libfunc (gt_optab, TFmode, 0);
10677 set_optab_libfunc (ge_optab, TFmode, 0);
10678 set_optab_libfunc (lt_optab, TFmode, 0);
10679 set_optab_libfunc (le_optab, TFmode, 0);
10682 /* Rename the division and modulus functions in VMS. */
10684 static void
10685 ia64_vms_init_libfuncs (void)
10687 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10688 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10689 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10690 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10691 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10692 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10693 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10694 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10695 #ifdef MEM_LIBFUNCS_INIT
10696 MEM_LIBFUNCS_INIT;
10697 #endif
10700 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10701 the HPUX conventions. */
10703 static void
10704 ia64_sysv4_init_libfuncs (void)
10706 ia64_init_libfuncs ();
10708 /* These functions are not part of the HPUX TFmode interface. We
10709 use them instead of _U_Qfcmp, which doesn't work the way we
10710 expect. */
10711 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10712 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10713 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10714 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10715 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10716 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10718 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10719 glibc doesn't have them. */
10722 /* Use soft-fp. */
10724 static void
10725 ia64_soft_fp_init_libfuncs (void)
10729 static bool
10730 ia64_vms_valid_pointer_mode (machine_mode mode)
10732 return (mode == SImode || mode == DImode);
10735 /* For HPUX, it is illegal to have relocations in shared segments. */
10737 static int
10738 ia64_hpux_reloc_rw_mask (void)
10740 return 3;
10743 /* For others, relax this so that relocations to local data goes in
10744 read-only segments, but we still cannot allow global relocations
10745 in read-only segments. */
10747 static int
10748 ia64_reloc_rw_mask (void)
10750 return flag_pic ? 3 : 2;
10753 /* Return the section to use for X. The only special thing we do here
10754 is to honor small data. */
10756 static section *
10757 ia64_select_rtx_section (machine_mode mode, rtx x,
10758 unsigned HOST_WIDE_INT align)
10760 if (GET_MODE_SIZE (mode) > 0
10761 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10762 && !TARGET_NO_SDATA)
10763 return sdata_section;
10764 else
10765 return default_elf_select_rtx_section (mode, x, align);
10768 static unsigned int
10769 ia64_section_type_flags (tree decl, const char *name, int reloc)
10771 unsigned int flags = 0;
10773 if (strcmp (name, ".sdata") == 0
10774 || strncmp (name, ".sdata.", 7) == 0
10775 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10776 || strncmp (name, ".sdata2.", 8) == 0
10777 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10778 || strcmp (name, ".sbss") == 0
10779 || strncmp (name, ".sbss.", 6) == 0
10780 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10781 flags = SECTION_SMALL;
10783 flags |= default_section_type_flags (decl, name, reloc);
10784 return flags;
10787 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10788 structure type and that the address of that type should be passed
10789 in out0, rather than in r8. */
10791 static bool
10792 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10794 tree ret_type = TREE_TYPE (fntype);
10796 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10797 as the structure return address parameter, if the return value
10798 type has a non-trivial copy constructor or destructor. It is not
10799 clear if this same convention should be used for other
10800 programming languages. Until G++ 3.4, we incorrectly used r8 for
10801 these return values. */
10802 return (abi_version_at_least (2)
10803 && ret_type
10804 && TYPE_MODE (ret_type) == BLKmode
10805 && TREE_ADDRESSABLE (ret_type)
10806 && lang_GNU_CXX ());
10809 /* Output the assembler code for a thunk function. THUNK_DECL is the
10810 declaration for the thunk function itself, FUNCTION is the decl for
10811 the target function. DELTA is an immediate constant offset to be
10812 added to THIS. If VCALL_OFFSET is nonzero, the word at
10813 *(*this + vcall_offset) should be added to THIS. */
10815 static void
10816 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10817 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10818 tree function)
10820 rtx this_rtx, funexp;
10821 rtx_insn *insn;
10822 unsigned int this_parmno;
10823 unsigned int this_regno;
10824 rtx delta_rtx;
10826 reload_completed = 1;
10827 epilogue_completed = 1;
10829 /* Set things up as ia64_expand_prologue might. */
10830 last_scratch_gr_reg = 15;
10832 memset (&current_frame_info, 0, sizeof (current_frame_info));
10833 current_frame_info.spill_cfa_off = -16;
10834 current_frame_info.n_input_regs = 1;
10835 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10837 /* Mark the end of the (empty) prologue. */
10838 emit_note (NOTE_INSN_PROLOGUE_END);
10840 /* Figure out whether "this" will be the first parameter (the
10841 typical case) or the second parameter (as happens when the
10842 virtual function returns certain class objects). */
10843 this_parmno
10844 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10845 ? 1 : 0);
10846 this_regno = IN_REG (this_parmno);
10847 if (!TARGET_REG_NAMES)
10848 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10850 this_rtx = gen_rtx_REG (Pmode, this_regno);
10852 /* Apply the constant offset, if required. */
10853 delta_rtx = GEN_INT (delta);
10854 if (TARGET_ILP32)
10856 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10857 REG_POINTER (tmp) = 1;
10858 if (delta && satisfies_constraint_I (delta_rtx))
10860 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10861 delta = 0;
10863 else
10864 emit_insn (gen_ptr_extend (this_rtx, tmp));
10866 if (delta)
10868 if (!satisfies_constraint_I (delta_rtx))
10870 rtx tmp = gen_rtx_REG (Pmode, 2);
10871 emit_move_insn (tmp, delta_rtx);
10872 delta_rtx = tmp;
10874 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10877 /* Apply the offset from the vtable, if required. */
10878 if (vcall_offset)
10880 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10881 rtx tmp = gen_rtx_REG (Pmode, 2);
10883 if (TARGET_ILP32)
10885 rtx t = gen_rtx_REG (ptr_mode, 2);
10886 REG_POINTER (t) = 1;
10887 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10888 if (satisfies_constraint_I (vcall_offset_rtx))
10890 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10891 vcall_offset = 0;
10893 else
10894 emit_insn (gen_ptr_extend (tmp, t));
10896 else
10897 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10899 if (vcall_offset)
10901 if (!satisfies_constraint_J (vcall_offset_rtx))
10903 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10904 emit_move_insn (tmp2, vcall_offset_rtx);
10905 vcall_offset_rtx = tmp2;
10907 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10910 if (TARGET_ILP32)
10911 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10912 else
10913 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10915 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10918 /* Generate a tail call to the target function. */
10919 if (! TREE_USED (function))
10921 assemble_external (function);
10922 TREE_USED (function) = 1;
10924 funexp = XEXP (DECL_RTL (function), 0);
10925 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10926 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10927 insn = get_last_insn ();
10928 SIBLING_CALL_P (insn) = 1;
10930 /* Code generation for calls relies on splitting. */
10931 reload_completed = 1;
10932 epilogue_completed = 1;
10933 try_split (PATTERN (insn), insn, 0);
10935 emit_barrier ();
10937 /* Run just enough of rest_of_compilation to get the insns emitted.
10938 There's not really enough bulk here to make other passes such as
10939 instruction scheduling worth while. Note that use_thunk calls
10940 assemble_start_function and assemble_end_function. */
10942 emit_all_insn_group_barriers (NULL);
10943 insn = get_insns ();
10944 shorten_branches (insn);
10945 final_start_function (insn, file, 1);
10946 final (insn, file, 1);
10947 final_end_function ();
10949 reload_completed = 0;
10950 epilogue_completed = 0;
10953 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10955 static rtx
10956 ia64_struct_value_rtx (tree fntype,
10957 int incoming ATTRIBUTE_UNUSED)
10959 if (TARGET_ABI_OPEN_VMS ||
10960 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10961 return NULL_RTX;
10962 return gen_rtx_REG (Pmode, GR_REG (8));
10965 static bool
10966 ia64_scalar_mode_supported_p (machine_mode mode)
10968 switch (mode)
10970 case QImode:
10971 case HImode:
10972 case SImode:
10973 case DImode:
10974 case TImode:
10975 return true;
10977 case SFmode:
10978 case DFmode:
10979 case XFmode:
10980 case RFmode:
10981 return true;
10983 case TFmode:
10984 return true;
10986 default:
10987 return false;
10991 static bool
10992 ia64_vector_mode_supported_p (machine_mode mode)
10994 switch (mode)
10996 case V8QImode:
10997 case V4HImode:
10998 case V2SImode:
10999 return true;
11001 case V2SFmode:
11002 return true;
11004 default:
11005 return false;
11009 /* Implement the FUNCTION_PROFILER macro. */
11011 void
11012 ia64_output_function_profiler (FILE *file, int labelno)
11014 bool indirect_call;
11016 /* If the function needs a static chain and the static chain
11017 register is r15, we use an indirect call so as to bypass
11018 the PLT stub in case the executable is dynamically linked,
11019 because the stub clobbers r15 as per 5.3.6 of the psABI.
11020 We don't need to do that in non canonical PIC mode. */
11022 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11024 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11025 indirect_call = true;
11027 else
11028 indirect_call = false;
11030 if (TARGET_GNU_AS)
11031 fputs ("\t.prologue 4, r40\n", file);
11032 else
11033 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11034 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11036 if (NO_PROFILE_COUNTERS)
11037 fputs ("\tmov out3 = r0\n", file);
11038 else
11040 char buf[20];
11041 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11043 if (TARGET_AUTO_PIC)
11044 fputs ("\tmovl out3 = @gprel(", file);
11045 else
11046 fputs ("\taddl out3 = @ltoff(", file);
11047 assemble_name (file, buf);
11048 if (TARGET_AUTO_PIC)
11049 fputs (")\n", file);
11050 else
11051 fputs ("), r1\n", file);
11054 if (indirect_call)
11055 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11056 fputs ("\t;;\n", file);
11058 fputs ("\t.save rp, r42\n", file);
11059 fputs ("\tmov out2 = b0\n", file);
11060 if (indirect_call)
11061 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11062 fputs ("\t.body\n", file);
11063 fputs ("\tmov out1 = r1\n", file);
11064 if (indirect_call)
11066 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11067 fputs ("\tmov b6 = r16\n", file);
11068 fputs ("\tld8 r1 = [r14]\n", file);
11069 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11071 else
11072 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11075 static GTY(()) rtx mcount_func_rtx;
11076 static rtx
11077 gen_mcount_func_rtx (void)
11079 if (!mcount_func_rtx)
11080 mcount_func_rtx = init_one_libfunc ("_mcount");
11081 return mcount_func_rtx;
11084 void
11085 ia64_profile_hook (int labelno)
11087 rtx label, ip;
11089 if (NO_PROFILE_COUNTERS)
11090 label = const0_rtx;
11091 else
11093 char buf[30];
11094 const char *label_name;
11095 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11096 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11097 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11098 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11100 ip = gen_reg_rtx (Pmode);
11101 emit_insn (gen_ip_value (ip));
11102 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11103 VOIDmode, 3,
11104 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11105 ip, Pmode,
11106 label, Pmode);
11109 /* Return the mangling of TYPE if it is an extended fundamental type. */
11111 static const char *
11112 ia64_mangle_type (const_tree type)
11114 type = TYPE_MAIN_VARIANT (type);
11116 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11117 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11118 return NULL;
11120 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11121 mangled as "e". */
11122 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11123 return "g";
11124 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11125 an extended mangling. Elsewhere, "e" is available since long
11126 double is 80 bits. */
11127 if (TYPE_MODE (type) == XFmode)
11128 return TARGET_HPUX ? "u9__float80" : "e";
11129 if (TYPE_MODE (type) == RFmode)
11130 return "u7__fpreg";
11131 return NULL;
11134 /* Return the diagnostic message string if conversion from FROMTYPE to
11135 TOTYPE is not allowed, NULL otherwise. */
11136 static const char *
11137 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11139 /* Reject nontrivial conversion to or from __fpreg. */
11140 if (TYPE_MODE (fromtype) == RFmode
11141 && TYPE_MODE (totype) != RFmode
11142 && TYPE_MODE (totype) != VOIDmode)
11143 return N_("invalid conversion from %<__fpreg%>");
11144 if (TYPE_MODE (totype) == RFmode
11145 && TYPE_MODE (fromtype) != RFmode)
11146 return N_("invalid conversion to %<__fpreg%>");
11147 return NULL;
11150 /* Return the diagnostic message string if the unary operation OP is
11151 not permitted on TYPE, NULL otherwise. */
11152 static const char *
11153 ia64_invalid_unary_op (int op, const_tree type)
11155 /* Reject operations on __fpreg other than unary + or &. */
11156 if (TYPE_MODE (type) == RFmode
11157 && op != CONVERT_EXPR
11158 && op != ADDR_EXPR)
11159 return N_("invalid operation on %<__fpreg%>");
11160 return NULL;
11163 /* Return the diagnostic message string if the binary operation OP is
11164 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11165 static const char *
11166 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11168 /* Reject operations on __fpreg. */
11169 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11170 return N_("invalid operation on %<__fpreg%>");
11171 return NULL;
11174 /* HP-UX version_id attribute.
11175 For object foo, if the version_id is set to 1234 put out an alias
11176 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11177 other than an alias statement because it is an illegal symbol name. */
11179 static tree
11180 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11181 tree name ATTRIBUTE_UNUSED,
11182 tree args,
11183 int flags ATTRIBUTE_UNUSED,
11184 bool *no_add_attrs)
11186 tree arg = TREE_VALUE (args);
11188 if (TREE_CODE (arg) != STRING_CST)
11190 error("version attribute is not a string");
11191 *no_add_attrs = true;
11192 return NULL_TREE;
11194 return NULL_TREE;
11197 /* Target hook for c_mode_for_suffix. */
11199 static machine_mode
11200 ia64_c_mode_for_suffix (char suffix)
11202 if (suffix == 'q')
11203 return TFmode;
11204 if (suffix == 'w')
11205 return XFmode;
11207 return VOIDmode;
11210 static GTY(()) rtx ia64_dconst_0_5_rtx;
11213 ia64_dconst_0_5 (void)
11215 if (! ia64_dconst_0_5_rtx)
11217 REAL_VALUE_TYPE rv;
11218 real_from_string (&rv, "0.5");
11219 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11221 return ia64_dconst_0_5_rtx;
11224 static GTY(()) rtx ia64_dconst_0_375_rtx;
11227 ia64_dconst_0_375 (void)
11229 if (! ia64_dconst_0_375_rtx)
11231 REAL_VALUE_TYPE rv;
11232 real_from_string (&rv, "0.375");
11233 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11235 return ia64_dconst_0_375_rtx;
11238 static machine_mode
11239 ia64_get_reg_raw_mode (int regno)
11241 if (FR_REGNO_P (regno))
11242 return XFmode;
11243 return default_get_reg_raw_mode(regno);
11246 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11247 anymore. */
11249 bool
11250 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11252 return TARGET_HPUX && mode == TFmode;
11255 /* Always default to .text section until HP-UX linker is fixed. */
11257 ATTRIBUTE_UNUSED static section *
11258 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11259 enum node_frequency freq ATTRIBUTE_UNUSED,
11260 bool startup ATTRIBUTE_UNUSED,
11261 bool exit ATTRIBUTE_UNUSED)
11263 return NULL;
11266 /* Construct (set target (vec_select op0 (parallel perm))) and
11267 return true if that's a valid instruction in the active ISA. */
11269 static bool
11270 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11272 rtx rperm[MAX_VECT_LEN], x;
11273 unsigned i;
11275 for (i = 0; i < nelt; ++i)
11276 rperm[i] = GEN_INT (perm[i]);
11278 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11279 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11280 x = gen_rtx_SET (target, x);
11282 rtx_insn *insn = emit_insn (x);
11283 if (recog_memoized (insn) < 0)
11285 remove_insn (insn);
11286 return false;
11288 return true;
11291 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11293 static bool
11294 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11295 const unsigned char *perm, unsigned nelt)
11297 machine_mode v2mode;
11298 rtx x;
11300 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11301 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11302 return expand_vselect (target, x, perm, nelt);
11305 /* Try to expand a no-op permutation. */
11307 static bool
11308 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11310 unsigned i, nelt = d->nelt;
11312 for (i = 0; i < nelt; ++i)
11313 if (d->perm[i] != i)
11314 return false;
11316 if (!d->testing_p)
11317 emit_move_insn (d->target, d->op0);
11319 return true;
11322 /* Try to expand D via a shrp instruction. */
11324 static bool
11325 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11327 unsigned i, nelt = d->nelt, shift, mask;
11328 rtx tmp, hi, lo;
11330 /* ??? Don't force V2SFmode into the integer registers. */
11331 if (d->vmode == V2SFmode)
11332 return false;
11334 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11336 shift = d->perm[0];
11337 if (BYTES_BIG_ENDIAN && shift > nelt)
11338 return false;
11340 for (i = 1; i < nelt; ++i)
11341 if (d->perm[i] != ((shift + i) & mask))
11342 return false;
11344 if (d->testing_p)
11345 return true;
11347 hi = shift < nelt ? d->op1 : d->op0;
11348 lo = shift < nelt ? d->op0 : d->op1;
11350 shift %= nelt;
11352 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11354 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11355 gcc_assert (IN_RANGE (shift, 1, 63));
11357 /* Recall that big-endian elements are numbered starting at the top of
11358 the register. Ideally we'd have a shift-left-pair. But since we
11359 don't, convert to a shift the other direction. */
11360 if (BYTES_BIG_ENDIAN)
11361 shift = 64 - shift;
11363 tmp = gen_reg_rtx (DImode);
11364 hi = gen_lowpart (DImode, hi);
11365 lo = gen_lowpart (DImode, lo);
11366 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11368 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11369 return true;
11372 /* Try to instantiate D in a single instruction. */
11374 static bool
11375 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11377 unsigned i, nelt = d->nelt;
11378 unsigned char perm2[MAX_VECT_LEN];
11380 /* Try single-operand selections. */
11381 if (d->one_operand_p)
11383 if (expand_vec_perm_identity (d))
11384 return true;
11385 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11386 return true;
11389 /* Try two operand selections. */
11390 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11391 return true;
11393 /* Recognize interleave style patterns with reversed operands. */
11394 if (!d->one_operand_p)
11396 for (i = 0; i < nelt; ++i)
11398 unsigned e = d->perm[i];
11399 if (e >= nelt)
11400 e -= nelt;
11401 else
11402 e += nelt;
11403 perm2[i] = e;
11406 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11407 return true;
11410 if (expand_vec_perm_shrp (d))
11411 return true;
11413 /* ??? Look for deposit-like permutations where most of the result
11414 comes from one vector unchanged and the rest comes from a
11415 sequential hunk of the other vector. */
11417 return false;
11420 /* Pattern match broadcast permutations. */
11422 static bool
11423 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11425 unsigned i, elt, nelt = d->nelt;
11426 unsigned char perm2[2];
11427 rtx temp;
11428 bool ok;
11430 if (!d->one_operand_p)
11431 return false;
11433 elt = d->perm[0];
11434 for (i = 1; i < nelt; ++i)
11435 if (d->perm[i] != elt)
11436 return false;
11438 switch (d->vmode)
11440 case V2SImode:
11441 case V2SFmode:
11442 /* Implementable by interleave. */
11443 perm2[0] = elt;
11444 perm2[1] = elt + 2;
11445 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11446 gcc_assert (ok);
11447 break;
11449 case V8QImode:
11450 /* Implementable by extract + broadcast. */
11451 if (BYTES_BIG_ENDIAN)
11452 elt = 7 - elt;
11453 elt *= BITS_PER_UNIT;
11454 temp = gen_reg_rtx (DImode);
11455 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11456 GEN_INT (8), GEN_INT (elt)));
11457 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11458 break;
11460 case V4HImode:
11461 /* Should have been matched directly by vec_select. */
11462 default:
11463 gcc_unreachable ();
11466 return true;
11469 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11470 two vector permutation into a single vector permutation by using
11471 an interleave operation to merge the vectors. */
11473 static bool
11474 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11476 struct expand_vec_perm_d dremap, dfinal;
11477 unsigned char remap[2 * MAX_VECT_LEN];
11478 unsigned contents, i, nelt, nelt2;
11479 unsigned h0, h1, h2, h3;
11480 rtx_insn *seq;
11481 bool ok;
11483 if (d->one_operand_p)
11484 return false;
11486 nelt = d->nelt;
11487 nelt2 = nelt / 2;
11489 /* Examine from whence the elements come. */
11490 contents = 0;
11491 for (i = 0; i < nelt; ++i)
11492 contents |= 1u << d->perm[i];
11494 memset (remap, 0xff, sizeof (remap));
11495 dremap = *d;
11497 h0 = (1u << nelt2) - 1;
11498 h1 = h0 << nelt2;
11499 h2 = h0 << nelt;
11500 h3 = h0 << (nelt + nelt2);
11502 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11504 for (i = 0; i < nelt; ++i)
11506 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11507 remap[which] = i;
11508 dremap.perm[i] = which;
11511 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11513 for (i = 0; i < nelt; ++i)
11515 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11516 remap[which] = i;
11517 dremap.perm[i] = which;
11520 else if ((contents & 0x5555) == contents) /* mix even elements */
11522 for (i = 0; i < nelt; ++i)
11524 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11525 remap[which] = i;
11526 dremap.perm[i] = which;
11529 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11531 for (i = 0; i < nelt; ++i)
11533 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11534 remap[which] = i;
11535 dremap.perm[i] = which;
11538 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11540 unsigned shift = ctz_hwi (contents);
11541 for (i = 0; i < nelt; ++i)
11543 unsigned which = (i + shift) & (2 * nelt - 1);
11544 remap[which] = i;
11545 dremap.perm[i] = which;
11548 else
11549 return false;
11551 /* Use the remapping array set up above to move the elements from their
11552 swizzled locations into their final destinations. */
11553 dfinal = *d;
11554 for (i = 0; i < nelt; ++i)
11556 unsigned e = remap[d->perm[i]];
11557 gcc_assert (e < nelt);
11558 dfinal.perm[i] = e;
11560 if (d->testing_p)
11561 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11562 else
11563 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11564 dfinal.op1 = dfinal.op0;
11565 dfinal.one_operand_p = true;
11566 dremap.target = dfinal.op0;
11568 /* Test if the final remap can be done with a single insn. For V4HImode
11569 this *will* succeed. For V8QImode or V2SImode it may not. */
11570 start_sequence ();
11571 ok = expand_vec_perm_1 (&dfinal);
11572 seq = get_insns ();
11573 end_sequence ();
11574 if (!ok)
11575 return false;
11576 if (d->testing_p)
11577 return true;
11579 ok = expand_vec_perm_1 (&dremap);
11580 gcc_assert (ok);
11582 emit_insn (seq);
11583 return true;
11586 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11587 constant permutation via two mux2 and a merge. */
11589 static bool
11590 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11592 unsigned char perm2[4];
11593 rtx rmask[4];
11594 unsigned i;
11595 rtx t0, t1, mask, x;
11596 bool ok;
11598 if (d->vmode != V4HImode || d->one_operand_p)
11599 return false;
11600 if (d->testing_p)
11601 return true;
11603 for (i = 0; i < 4; ++i)
11605 perm2[i] = d->perm[i] & 3;
11606 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11608 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11609 mask = force_reg (V4HImode, mask);
11611 t0 = gen_reg_rtx (V4HImode);
11612 t1 = gen_reg_rtx (V4HImode);
11614 ok = expand_vselect (t0, d->op0, perm2, 4);
11615 gcc_assert (ok);
11616 ok = expand_vselect (t1, d->op1, perm2, 4);
11617 gcc_assert (ok);
11619 x = gen_rtx_AND (V4HImode, mask, t0);
11620 emit_insn (gen_rtx_SET (t0, x));
11622 x = gen_rtx_NOT (V4HImode, mask);
11623 x = gen_rtx_AND (V4HImode, x, t1);
11624 emit_insn (gen_rtx_SET (t1, x));
11626 x = gen_rtx_IOR (V4HImode, t0, t1);
11627 emit_insn (gen_rtx_SET (d->target, x));
11629 return true;
11632 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11633 With all of the interface bits taken care of, perform the expansion
11634 in D and return true on success. */
11636 static bool
11637 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11639 if (expand_vec_perm_1 (d))
11640 return true;
11641 if (expand_vec_perm_broadcast (d))
11642 return true;
11643 if (expand_vec_perm_interleave_2 (d))
11644 return true;
11645 if (expand_vec_perm_v4hi_5 (d))
11646 return true;
11647 return false;
11650 bool
11651 ia64_expand_vec_perm_const (rtx operands[4])
11653 struct expand_vec_perm_d d;
11654 unsigned char perm[MAX_VECT_LEN];
11655 int i, nelt, which;
11656 rtx sel;
11658 d.target = operands[0];
11659 d.op0 = operands[1];
11660 d.op1 = operands[2];
11661 sel = operands[3];
11663 d.vmode = GET_MODE (d.target);
11664 gcc_assert (VECTOR_MODE_P (d.vmode));
11665 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11666 d.testing_p = false;
11668 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11669 gcc_assert (XVECLEN (sel, 0) == nelt);
11670 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11672 for (i = which = 0; i < nelt; ++i)
11674 rtx e = XVECEXP (sel, 0, i);
11675 int ei = INTVAL (e) & (2 * nelt - 1);
11677 which |= (ei < nelt ? 1 : 2);
11678 d.perm[i] = ei;
11679 perm[i] = ei;
11682 switch (which)
11684 default:
11685 gcc_unreachable();
11687 case 3:
11688 if (!rtx_equal_p (d.op0, d.op1))
11690 d.one_operand_p = false;
11691 break;
11694 /* The elements of PERM do not suggest that only the first operand
11695 is used, but both operands are identical. Allow easier matching
11696 of the permutation by folding the permutation into the single
11697 input vector. */
11698 for (i = 0; i < nelt; ++i)
11699 if (d.perm[i] >= nelt)
11700 d.perm[i] -= nelt;
11701 /* FALLTHRU */
11703 case 1:
11704 d.op1 = d.op0;
11705 d.one_operand_p = true;
11706 break;
11708 case 2:
11709 for (i = 0; i < nelt; ++i)
11710 d.perm[i] -= nelt;
11711 d.op0 = d.op1;
11712 d.one_operand_p = true;
11713 break;
11716 if (ia64_expand_vec_perm_const_1 (&d))
11717 return true;
11719 /* If the mask says both arguments are needed, but they are the same,
11720 the above tried to expand with one_operand_p true. If that didn't
11721 work, retry with one_operand_p false, as that's what we used in _ok. */
11722 if (which == 3 && d.one_operand_p)
11724 memcpy (d.perm, perm, sizeof (perm));
11725 d.one_operand_p = false;
11726 return ia64_expand_vec_perm_const_1 (&d);
11729 return false;
11732 /* Implement targetm.vectorize.vec_perm_const_ok. */
11734 static bool
11735 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11736 const unsigned char *sel)
11738 struct expand_vec_perm_d d;
11739 unsigned int i, nelt, which;
11740 bool ret;
11742 d.vmode = vmode;
11743 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11744 d.testing_p = true;
11746 /* Extract the values from the vector CST into the permutation
11747 array in D. */
11748 memcpy (d.perm, sel, nelt);
11749 for (i = which = 0; i < nelt; ++i)
11751 unsigned char e = d.perm[i];
11752 gcc_assert (e < 2 * nelt);
11753 which |= (e < nelt ? 1 : 2);
11756 /* For all elements from second vector, fold the elements to first. */
11757 if (which == 2)
11758 for (i = 0; i < nelt; ++i)
11759 d.perm[i] -= nelt;
11761 /* Check whether the mask can be applied to the vector type. */
11762 d.one_operand_p = (which != 3);
11764 /* Otherwise we have to go through the motions and see if we can
11765 figure out how to generate the requested permutation. */
11766 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11767 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11768 if (!d.one_operand_p)
11769 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11771 start_sequence ();
11772 ret = ia64_expand_vec_perm_const_1 (&d);
11773 end_sequence ();
11775 return ret;
11778 void
11779 ia64_expand_vec_setv2sf (rtx operands[3])
11781 struct expand_vec_perm_d d;
11782 unsigned int which;
11783 bool ok;
11785 d.target = operands[0];
11786 d.op0 = operands[0];
11787 d.op1 = gen_reg_rtx (V2SFmode);
11788 d.vmode = V2SFmode;
11789 d.nelt = 2;
11790 d.one_operand_p = false;
11791 d.testing_p = false;
11793 which = INTVAL (operands[2]);
11794 gcc_assert (which <= 1);
11795 d.perm[0] = 1 - which;
11796 d.perm[1] = which + 2;
11798 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11800 ok = ia64_expand_vec_perm_const_1 (&d);
11801 gcc_assert (ok);
11804 void
11805 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11807 struct expand_vec_perm_d d;
11808 machine_mode vmode = GET_MODE (target);
11809 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11810 bool ok;
11812 d.target = target;
11813 d.op0 = op0;
11814 d.op1 = op1;
11815 d.vmode = vmode;
11816 d.nelt = nelt;
11817 d.one_operand_p = false;
11818 d.testing_p = false;
11820 for (i = 0; i < nelt; ++i)
11821 d.perm[i] = i * 2 + odd;
11823 ok = ia64_expand_vec_perm_const_1 (&d);
11824 gcc_assert (ok);
11827 #include "gt-ia64.h"