1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2016 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
33 #include "stringpool.h"
38 #include "diagnostic-core.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
45 #include "insn-attr.h"
51 #include "sched-int.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
59 #include "tm-constrs.h"
60 #include "sel-sched.h"
66 /* This file should be included last. */
67 #include "target-def.h"
69 /* This is used for communication between ASM_OUTPUT_LABEL and
70 ASM_OUTPUT_LABELREF. */
71 int ia64_asm_output_label
= 0;
73 /* Register names for ia64_expand_prologue. */
74 static const char * const ia64_reg_numbers
[96] =
75 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
76 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
77 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
78 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
79 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
80 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
81 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
82 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
83 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
84 "r104","r105","r106","r107","r108","r109","r110","r111",
85 "r112","r113","r114","r115","r116","r117","r118","r119",
86 "r120","r121","r122","r123","r124","r125","r126","r127"};
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_input_reg_names
[8] =
90 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
92 /* ??? These strings could be shared with REGISTER_NAMES. */
93 static const char * const ia64_local_reg_names
[80] =
94 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
95 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
96 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
97 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
98 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
99 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
100 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
101 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
102 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
103 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
105 /* ??? These strings could be shared with REGISTER_NAMES. */
106 static const char * const ia64_output_reg_names
[8] =
107 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
109 /* Variables which are this size or smaller are put in the sdata/sbss
112 unsigned int ia64_section_threshold
;
114 /* The following variable is used by the DFA insn scheduler. The value is
115 TRUE if we do insn bundling instead of insn scheduling. */
127 number_of_ia64_frame_regs
130 /* Structure to be filled in by ia64_compute_frame_size with register
131 save masks and offsets for the current function. */
133 struct ia64_frame_info
135 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
136 the caller's scratch area. */
137 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
138 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
139 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
140 HARD_REG_SET mask
; /* mask of saved registers. */
141 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
142 registers or long-term scratches. */
143 int n_spilled
; /* number of spilled registers. */
144 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
145 int n_input_regs
; /* number of input registers used. */
146 int n_local_regs
; /* number of local registers used. */
147 int n_output_regs
; /* number of output registers used. */
148 int n_rotate_regs
; /* number of rotating registers used. */
150 char need_regstk
; /* true if a .regstk directive needed. */
151 char initialized
; /* true if the data is finalized. */
154 /* Current frame information calculated by ia64_compute_frame_size. */
155 static struct ia64_frame_info current_frame_info
;
156 /* The actual registers that are emitted. */
157 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
159 static int ia64_first_cycle_multipass_dfa_lookahead (void);
160 static void ia64_dependencies_evaluation_hook (rtx_insn
*, rtx_insn
*);
161 static void ia64_init_dfa_pre_cycle_insn (void);
162 static rtx
ia64_dfa_pre_cycle_insn (void);
163 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
164 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn
*, int, int, int *);
165 static void ia64_h_i_d_extended (void);
166 static void * ia64_alloc_sched_context (void);
167 static void ia64_init_sched_context (void *, bool);
168 static void ia64_set_sched_context (void *);
169 static void ia64_clear_sched_context (void *);
170 static void ia64_free_sched_context (void *);
171 static int ia64_mode_to_int (machine_mode
);
172 static void ia64_set_sched_flags (spec_info_t
);
173 static ds_t
ia64_get_insn_spec_ds (rtx_insn
*);
174 static ds_t
ia64_get_insn_checked_ds (rtx_insn
*);
175 static bool ia64_skip_rtx_p (const_rtx
);
176 static int ia64_speculate_insn (rtx_insn
*, ds_t
, rtx
*);
177 static bool ia64_needs_block_p (ds_t
);
178 static rtx
ia64_gen_spec_check (rtx_insn
*, rtx_insn
*, ds_t
);
179 static int ia64_spec_check_p (rtx
);
180 static int ia64_spec_check_src_p (rtx
);
181 static rtx
gen_tls_get_addr (void);
182 static rtx
gen_thread_pointer (void);
183 static int find_gr_spill (enum ia64_frame_regs
, int);
184 static int next_scratch_gr_reg (void);
185 static void mark_reg_gr_used_mask (rtx
, void *);
186 static void ia64_compute_frame_size (HOST_WIDE_INT
);
187 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
188 static void finish_spill_pointers (void);
189 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
190 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
191 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
192 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
193 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
194 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
196 static void ia64_option_override (void);
197 static bool ia64_can_eliminate (const int, const int);
198 static machine_mode
hfa_element_mode (const_tree
, bool);
199 static void ia64_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
201 static int ia64_arg_partial_bytes (cumulative_args_t
, machine_mode
,
203 static rtx
ia64_function_arg_1 (cumulative_args_t
, machine_mode
,
204 const_tree
, bool, bool);
205 static rtx
ia64_function_arg (cumulative_args_t
, machine_mode
,
207 static rtx
ia64_function_incoming_arg (cumulative_args_t
,
208 machine_mode
, const_tree
, bool);
209 static void ia64_function_arg_advance (cumulative_args_t
, machine_mode
,
211 static unsigned int ia64_function_arg_boundary (machine_mode
,
213 static bool ia64_function_ok_for_sibcall (tree
, tree
);
214 static bool ia64_return_in_memory (const_tree
, const_tree
);
215 static rtx
ia64_function_value (const_tree
, const_tree
, bool);
216 static rtx
ia64_libcall_value (machine_mode
, const_rtx
);
217 static bool ia64_function_value_regno_p (const unsigned int);
218 static int ia64_register_move_cost (machine_mode
, reg_class_t
,
220 static int ia64_memory_move_cost (machine_mode mode
, reg_class_t
,
222 static bool ia64_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
223 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
224 static void fix_range (const char *);
225 static struct machine_function
* ia64_init_machine_status (void);
226 static void emit_insn_group_barriers (FILE *);
227 static void emit_all_insn_group_barriers (FILE *);
228 static void final_emit_insn_group_barriers (FILE *);
229 static void emit_predicate_relation_info (void);
230 static void ia64_reorg (void);
231 static bool ia64_in_small_data_p (const_tree
);
232 static void process_epilogue (FILE *, rtx
, bool, bool);
234 static bool ia64_assemble_integer (rtx
, unsigned int, int);
235 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
236 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
237 static void ia64_output_function_end_prologue (FILE *);
239 static void ia64_print_operand (FILE *, rtx
, int);
240 static void ia64_print_operand_address (FILE *, machine_mode
, rtx
);
241 static bool ia64_print_operand_punct_valid_p (unsigned char code
);
243 static int ia64_issue_rate (void);
244 static int ia64_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, dw_t
);
245 static void ia64_sched_init (FILE *, int, int);
246 static void ia64_sched_init_global (FILE *, int, int);
247 static void ia64_sched_finish_global (FILE *, int);
248 static void ia64_sched_finish (FILE *, int);
249 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn
**, int *, int, int);
250 static int ia64_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
251 static int ia64_sched_reorder2 (FILE *, int, rtx_insn
**, int *, int);
252 static int ia64_variable_issue (FILE *, int, rtx_insn
*, int);
254 static void ia64_asm_unwind_emit (FILE *, rtx_insn
*);
255 static void ia64_asm_emit_except_personality (rtx
);
256 static void ia64_asm_init_sections (void);
258 static enum unwind_info_type
ia64_debug_unwind_info (void);
260 static struct bundle_state
*get_free_bundle_state (void);
261 static void free_bundle_state (struct bundle_state
*);
262 static void initiate_bundle_states (void);
263 static void finish_bundle_states (void);
264 static int insert_bundle_state (struct bundle_state
*);
265 static void initiate_bundle_state_table (void);
266 static void finish_bundle_state_table (void);
267 static int try_issue_nops (struct bundle_state
*, int);
268 static int try_issue_insn (struct bundle_state
*, rtx
);
269 static void issue_nops_and_insn (struct bundle_state
*, int, rtx_insn
*,
271 static int get_max_pos (state_t
);
272 static int get_template (state_t
, int);
274 static rtx_insn
*get_next_important_insn (rtx_insn
*, rtx_insn
*);
275 static bool important_for_bundling_p (rtx_insn
*);
276 static bool unknown_for_bundling_p (rtx_insn
*);
277 static void bundling (FILE *, int, rtx_insn
*, rtx_insn
*);
279 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
280 HOST_WIDE_INT
, tree
);
281 static void ia64_file_start (void);
282 static void ia64_globalize_decl_name (FILE *, tree
);
284 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
285 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
286 static section
*ia64_select_rtx_section (machine_mode
, rtx
,
287 unsigned HOST_WIDE_INT
);
288 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
290 static unsigned int ia64_section_type_flags (tree
, const char *, int);
291 static void ia64_init_libfuncs (void)
293 static void ia64_hpux_init_libfuncs (void)
295 static void ia64_sysv4_init_libfuncs (void)
297 static void ia64_vms_init_libfuncs (void)
299 static void ia64_soft_fp_init_libfuncs (void)
301 static bool ia64_vms_valid_pointer_mode (machine_mode mode
)
303 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
306 static bool ia64_attribute_takes_identifier_p (const_tree
);
307 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
308 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
309 static void ia64_encode_section_info (tree
, rtx
, int);
310 static rtx
ia64_struct_value_rtx (tree
, int);
311 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
312 static bool ia64_scalar_mode_supported_p (machine_mode mode
);
313 static bool ia64_vector_mode_supported_p (machine_mode mode
);
314 static bool ia64_legitimate_constant_p (machine_mode
, rtx
);
315 static bool ia64_legitimate_address_p (machine_mode
, rtx
, bool);
316 static bool ia64_cannot_force_const_mem (machine_mode
, rtx
);
317 static const char *ia64_mangle_type (const_tree
);
318 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
319 static const char *ia64_invalid_unary_op (int, const_tree
);
320 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
321 static machine_mode
ia64_c_mode_for_suffix (char);
322 static void ia64_trampoline_init (rtx
, tree
, rtx
);
323 static void ia64_override_options_after_change (void);
324 static bool ia64_member_type_forces_blk (const_tree
, machine_mode
);
326 static tree
ia64_fold_builtin (tree
, int, tree
*, bool);
327 static tree
ia64_builtin_decl (unsigned, bool);
329 static reg_class_t
ia64_preferred_reload_class (rtx
, reg_class_t
);
330 static machine_mode
ia64_get_reg_raw_mode (int regno
);
331 static section
* ia64_hpux_function_section (tree
, enum node_frequency
,
334 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
335 const unsigned char *sel
);
337 #define MAX_VECT_LEN 8
339 struct expand_vec_perm_d
341 rtx target
, op0
, op1
;
342 unsigned char perm
[MAX_VECT_LEN
];
349 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
);
352 /* Table of valid machine attributes. */
353 static const struct attribute_spec ia64_attribute_table
[] =
355 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
356 affects_type_identity } */
357 { "syscall_linkage", 0, 0, false, true, true, NULL
, false },
358 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
,
360 #if TARGET_ABI_OPEN_VMS
361 { "common_object", 1, 1, true, false, false,
362 ia64_vms_common_object_attribute
, false },
364 { "version_id", 1, 1, true, false, false,
365 ia64_handle_version_id_attribute
, false },
366 { NULL
, 0, 0, false, false, false, NULL
, false }
369 /* Initialize the GCC target structure. */
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS ia64_init_builtins
376 #undef TARGET_FOLD_BUILTIN
377 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
379 #undef TARGET_EXPAND_BUILTIN
380 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
382 #undef TARGET_BUILTIN_DECL
383 #define TARGET_BUILTIN_DECL ia64_builtin_decl
385 #undef TARGET_ASM_BYTE_OP
386 #define TARGET_ASM_BYTE_OP "\tdata1\t"
387 #undef TARGET_ASM_ALIGNED_HI_OP
388 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
389 #undef TARGET_ASM_ALIGNED_SI_OP
390 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
391 #undef TARGET_ASM_ALIGNED_DI_OP
392 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
393 #undef TARGET_ASM_UNALIGNED_HI_OP
394 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
395 #undef TARGET_ASM_UNALIGNED_SI_OP
396 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
397 #undef TARGET_ASM_UNALIGNED_DI_OP
398 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
399 #undef TARGET_ASM_INTEGER
400 #define TARGET_ASM_INTEGER ia64_assemble_integer
402 #undef TARGET_OPTION_OVERRIDE
403 #define TARGET_OPTION_OVERRIDE ia64_option_override
405 #undef TARGET_ASM_FUNCTION_PROLOGUE
406 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
407 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
408 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND ia64_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
419 #undef TARGET_IN_SMALL_DATA_P
420 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
422 #undef TARGET_SCHED_ADJUST_COST
423 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
424 #undef TARGET_SCHED_ISSUE_RATE
425 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
426 #undef TARGET_SCHED_VARIABLE_ISSUE
427 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
428 #undef TARGET_SCHED_INIT
429 #define TARGET_SCHED_INIT ia64_sched_init
430 #undef TARGET_SCHED_FINISH
431 #define TARGET_SCHED_FINISH ia64_sched_finish
432 #undef TARGET_SCHED_INIT_GLOBAL
433 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
434 #undef TARGET_SCHED_FINISH_GLOBAL
435 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
436 #undef TARGET_SCHED_REORDER
437 #define TARGET_SCHED_REORDER ia64_sched_reorder
438 #undef TARGET_SCHED_REORDER2
439 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
441 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
442 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
444 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
445 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
447 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
448 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
449 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
450 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
452 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
453 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
454 ia64_first_cycle_multipass_dfa_lookahead_guard
456 #undef TARGET_SCHED_DFA_NEW_CYCLE
457 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
459 #undef TARGET_SCHED_H_I_D_EXTENDED
460 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
462 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
463 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
465 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
466 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
468 #undef TARGET_SCHED_SET_SCHED_CONTEXT
469 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
471 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
472 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
474 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
475 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
477 #undef TARGET_SCHED_SET_SCHED_FLAGS
478 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
480 #undef TARGET_SCHED_GET_INSN_SPEC_DS
481 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
483 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
484 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
486 #undef TARGET_SCHED_SPECULATE_INSN
487 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
489 #undef TARGET_SCHED_NEEDS_BLOCK_P
490 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
492 #undef TARGET_SCHED_GEN_SPEC_CHECK
493 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
495 #undef TARGET_SCHED_SKIP_RTX_P
496 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
498 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
499 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
500 #undef TARGET_ARG_PARTIAL_BYTES
501 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
502 #undef TARGET_FUNCTION_ARG
503 #define TARGET_FUNCTION_ARG ia64_function_arg
504 #undef TARGET_FUNCTION_INCOMING_ARG
505 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
506 #undef TARGET_FUNCTION_ARG_ADVANCE
507 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
508 #undef TARGET_FUNCTION_ARG_BOUNDARY
509 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
511 #undef TARGET_ASM_OUTPUT_MI_THUNK
512 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
513 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
514 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
516 #undef TARGET_ASM_FILE_START
517 #define TARGET_ASM_FILE_START ia64_file_start
519 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
520 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
522 #undef TARGET_REGISTER_MOVE_COST
523 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
524 #undef TARGET_MEMORY_MOVE_COST
525 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
526 #undef TARGET_RTX_COSTS
527 #define TARGET_RTX_COSTS ia64_rtx_costs
528 #undef TARGET_ADDRESS_COST
529 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
531 #undef TARGET_UNSPEC_MAY_TRAP_P
532 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
534 #undef TARGET_MACHINE_DEPENDENT_REORG
535 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
537 #undef TARGET_ENCODE_SECTION_INFO
538 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
540 #undef TARGET_SECTION_TYPE_FLAGS
541 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
544 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
545 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
548 /* ??? Investigate. */
550 #undef TARGET_PROMOTE_PROTOTYPES
551 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
554 #undef TARGET_FUNCTION_VALUE
555 #define TARGET_FUNCTION_VALUE ia64_function_value
556 #undef TARGET_LIBCALL_VALUE
557 #define TARGET_LIBCALL_VALUE ia64_libcall_value
558 #undef TARGET_FUNCTION_VALUE_REGNO_P
559 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
561 #undef TARGET_STRUCT_VALUE_RTX
562 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
563 #undef TARGET_RETURN_IN_MEMORY
564 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
565 #undef TARGET_SETUP_INCOMING_VARARGS
566 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
567 #undef TARGET_STRICT_ARGUMENT_NAMING
568 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
569 #undef TARGET_MUST_PASS_IN_STACK
570 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
571 #undef TARGET_GET_RAW_RESULT_MODE
572 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
573 #undef TARGET_GET_RAW_ARG_MODE
574 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
576 #undef TARGET_MEMBER_TYPE_FORCES_BLK
577 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
579 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
580 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
582 #undef TARGET_ASM_UNWIND_EMIT
583 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
584 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
585 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
589 #undef TARGET_DEBUG_UNWIND_INFO
590 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
592 #undef TARGET_SCALAR_MODE_SUPPORTED_P
593 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
594 #undef TARGET_VECTOR_MODE_SUPPORTED_P
595 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
597 #undef TARGET_LEGITIMATE_CONSTANT_P
598 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
603 #define TARGET_LRA_P hook_bool_void_false
605 #undef TARGET_CANNOT_FORCE_CONST_MEM
606 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
608 #undef TARGET_MANGLE_TYPE
609 #define TARGET_MANGLE_TYPE ia64_mangle_type
611 #undef TARGET_INVALID_CONVERSION
612 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
613 #undef TARGET_INVALID_UNARY_OP
614 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
615 #undef TARGET_INVALID_BINARY_OP
616 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
618 #undef TARGET_C_MODE_FOR_SUFFIX
619 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
621 #undef TARGET_CAN_ELIMINATE
622 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
624 #undef TARGET_TRAMPOLINE_INIT
625 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
627 #undef TARGET_CAN_USE_DOLOOP_P
628 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
629 #undef TARGET_INVALID_WITHIN_DOLOOP
630 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
632 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
633 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
635 #undef TARGET_PREFERRED_RELOAD_CLASS
636 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
638 #undef TARGET_DELAY_SCHED2
639 #define TARGET_DELAY_SCHED2 true
641 /* Variable tracking should be run after all optimizations which
642 change order of insns. It also needs a valid CFG. */
643 #undef TARGET_DELAY_VARTRACK
644 #define TARGET_DELAY_VARTRACK true
646 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
647 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
649 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
650 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
652 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
653 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
655 struct gcc_target targetm
= TARGET_INITIALIZER
;
657 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
658 identifier as an argument, so the front end shouldn't look it up. */
661 ia64_attribute_takes_identifier_p (const_tree attr_id
)
663 if (is_attribute_p ("model", attr_id
))
665 #if TARGET_ABI_OPEN_VMS
666 if (is_attribute_p ("common_object", attr_id
))
674 ADDR_AREA_NORMAL
, /* normal address area */
675 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
679 static GTY(()) tree small_ident1
;
680 static GTY(()) tree small_ident2
;
685 if (small_ident1
== 0)
687 small_ident1
= get_identifier ("small");
688 small_ident2
= get_identifier ("__small__");
692 /* Retrieve the address area that has been chosen for the given decl. */
694 static ia64_addr_area
695 ia64_get_addr_area (tree decl
)
699 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
705 id
= TREE_VALUE (TREE_VALUE (model_attr
));
706 if (id
== small_ident1
|| id
== small_ident2
)
707 return ADDR_AREA_SMALL
;
709 return ADDR_AREA_NORMAL
;
713 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
714 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
716 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
718 tree arg
, decl
= *node
;
721 arg
= TREE_VALUE (args
);
722 if (arg
== small_ident1
|| arg
== small_ident2
)
724 addr_area
= ADDR_AREA_SMALL
;
728 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
730 *no_add_attrs
= true;
733 switch (TREE_CODE (decl
))
736 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
738 && !TREE_STATIC (decl
))
740 error_at (DECL_SOURCE_LOCATION (decl
),
741 "an address area attribute cannot be specified for "
743 *no_add_attrs
= true;
745 area
= ia64_get_addr_area (decl
);
746 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
748 error ("address area of %q+D conflicts with previous "
749 "declaration", decl
);
750 *no_add_attrs
= true;
755 error_at (DECL_SOURCE_LOCATION (decl
),
756 "address area attribute cannot be specified for "
758 *no_add_attrs
= true;
762 warning (OPT_Wattributes
, "%qE attribute ignored",
764 *no_add_attrs
= true;
771 /* Part of the low level implementation of DEC Ada pragma Common_Object which
772 enables the shared use of variables stored in overlaid linker areas
773 corresponding to the use of Fortran COMMON. */
776 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
777 int flags ATTRIBUTE_UNUSED
,
783 gcc_assert (DECL_P (decl
));
785 DECL_COMMON (decl
) = 1;
786 id
= TREE_VALUE (args
);
787 if (TREE_CODE (id
) != IDENTIFIER_NODE
&& TREE_CODE (id
) != STRING_CST
)
789 error ("%qE attribute requires a string constant argument", name
);
790 *no_add_attrs
= true;
796 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
799 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
800 unsigned HOST_WIDE_INT size
,
803 tree attr
= DECL_ATTRIBUTES (decl
);
806 attr
= lookup_attribute ("common_object", attr
);
809 tree id
= TREE_VALUE (TREE_VALUE (attr
));
812 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
813 name
= IDENTIFIER_POINTER (id
);
814 else if (TREE_CODE (id
) == STRING_CST
)
815 name
= TREE_STRING_POINTER (id
);
819 fprintf (file
, "\t.vms_common\t\"%s\",", name
);
822 fprintf (file
, "%s", COMMON_ASM_OP
);
824 /* Code from elfos.h. */
825 assemble_name (file
, name
);
826 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u",
827 size
, align
/ BITS_PER_UNIT
);
833 ia64_encode_addr_area (tree decl
, rtx symbol
)
837 flags
= SYMBOL_REF_FLAGS (symbol
);
838 switch (ia64_get_addr_area (decl
))
840 case ADDR_AREA_NORMAL
: break;
841 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
842 default: gcc_unreachable ();
844 SYMBOL_REF_FLAGS (symbol
) = flags
;
848 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
850 default_encode_section_info (decl
, rtl
, first
);
852 /* Careful not to prod global register variables. */
853 if (TREE_CODE (decl
) == VAR_DECL
854 && GET_CODE (DECL_RTL (decl
)) == MEM
855 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
856 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
857 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
860 /* Return 1 if the operands of a move are ok. */
863 ia64_move_ok (rtx dst
, rtx src
)
865 /* If we're under init_recog_no_volatile, we'll not be able to use
866 memory_operand. So check the code directly and don't worry about
867 the validity of the underlying address, which should have been
868 checked elsewhere anyway. */
869 if (GET_CODE (dst
) != MEM
)
871 if (GET_CODE (src
) == MEM
)
873 if (register_operand (src
, VOIDmode
))
876 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
877 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
878 return src
== const0_rtx
;
880 return satisfies_constraint_G (src
);
883 /* Return 1 if the operands are ok for a floating point load pair. */
886 ia64_load_pair_ok (rtx dst
, rtx src
)
888 /* ??? There is a thinko in the implementation of the "x" constraint and the
889 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
890 also return false for it. */
891 if (GET_CODE (dst
) != REG
892 || !(FP_REGNO_P (REGNO (dst
)) && FP_REGNO_P (REGNO (dst
) + 1)))
894 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
896 switch (GET_CODE (XEXP (src
, 0)))
905 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
907 if (GET_CODE (adjust
) != CONST_INT
908 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
919 addp4_optimize_ok (rtx op1
, rtx op2
)
921 return (basereg_operand (op1
, GET_MODE(op1
)) !=
922 basereg_operand (op2
, GET_MODE(op2
)));
925 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
926 Return the length of the field, or <= 0 on failure. */
929 ia64_depz_field_mask (rtx rop
, rtx rshift
)
931 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
932 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
934 /* Get rid of the zero bits we're shifting in. */
937 /* We must now have a solid block of 1's at bit 0. */
938 return exact_log2 (op
+ 1);
941 /* Return the TLS model to use for ADDR. */
943 static enum tls_model
944 tls_symbolic_operand_type (rtx addr
)
946 enum tls_model tls_kind
= TLS_MODEL_NONE
;
948 if (GET_CODE (addr
) == CONST
)
950 if (GET_CODE (XEXP (addr
, 0)) == PLUS
951 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
952 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
954 else if (GET_CODE (addr
) == SYMBOL_REF
)
955 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
960 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
961 as a base register. */
964 ia64_reg_ok_for_base_p (const_rtx reg
, bool strict
)
967 && REGNO_OK_FOR_BASE_P (REGNO (reg
)))
970 && (GENERAL_REGNO_P (REGNO (reg
))
971 || !HARD_REGISTER_P (reg
)))
978 ia64_legitimate_address_reg (const_rtx reg
, bool strict
)
980 if ((REG_P (reg
) && ia64_reg_ok_for_base_p (reg
, strict
))
981 || (GET_CODE (reg
) == SUBREG
&& REG_P (XEXP (reg
, 0))
982 && ia64_reg_ok_for_base_p (XEXP (reg
, 0), strict
)))
989 ia64_legitimate_address_disp (const_rtx reg
, const_rtx disp
, bool strict
)
991 if (GET_CODE (disp
) == PLUS
992 && rtx_equal_p (reg
, XEXP (disp
, 0))
993 && (ia64_legitimate_address_reg (XEXP (disp
, 1), strict
)
994 || (CONST_INT_P (XEXP (disp
, 1))
995 && IN_RANGE (INTVAL (XEXP (disp
, 1)), -256, 255))))
1001 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1004 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED
,
1007 if (ia64_legitimate_address_reg (x
, strict
))
1009 else if ((GET_CODE (x
) == POST_INC
|| GET_CODE (x
) == POST_DEC
)
1010 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1011 && XEXP (x
, 0) != arg_pointer_rtx
)
1013 else if (GET_CODE (x
) == POST_MODIFY
1014 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1015 && XEXP (x
, 0) != arg_pointer_rtx
1016 && ia64_legitimate_address_disp (XEXP (x
, 0), XEXP (x
, 1), strict
))
1022 /* Return true if X is a constant that is valid for some immediate
1023 field in an instruction. */
1026 ia64_legitimate_constant_p (machine_mode mode
, rtx x
)
1028 switch (GET_CODE (x
))
1035 if (GET_MODE (x
) == VOIDmode
|| mode
== SFmode
|| mode
== DFmode
)
1037 return satisfies_constraint_G (x
);
1041 /* ??? Short term workaround for PR 28490. We must make the code here
1042 match the code in ia64_expand_move and move_operand, even though they
1043 are both technically wrong. */
1044 if (tls_symbolic_operand_type (x
) == 0)
1046 HOST_WIDE_INT addend
= 0;
1049 if (GET_CODE (op
) == CONST
1050 && GET_CODE (XEXP (op
, 0)) == PLUS
1051 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1053 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
1054 op
= XEXP (XEXP (op
, 0), 0);
1057 if (any_offset_symbol_operand (op
, mode
)
1058 || function_operand (op
, mode
))
1060 if (aligned_offset_symbol_operand (op
, mode
))
1061 return (addend
& 0x3fff) == 0;
1067 if (mode
== V2SFmode
)
1068 return satisfies_constraint_Y (x
);
1070 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1071 && GET_MODE_SIZE (mode
) <= 8);
1078 /* Don't allow TLS addresses to get spilled to memory. */
1081 ia64_cannot_force_const_mem (machine_mode mode
, rtx x
)
1085 return tls_symbolic_operand_type (x
) != 0;
1088 /* Expand a symbolic constant load. */
1091 ia64_expand_load_address (rtx dest
, rtx src
)
1093 gcc_assert (GET_CODE (dest
) == REG
);
1095 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1096 having to pointer-extend the value afterward. Other forms of address
1097 computation below are also more natural to compute as 64-bit quantities.
1098 If we've been given an SImode destination register, change it. */
1099 if (GET_MODE (dest
) != Pmode
)
1100 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
1101 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
1105 if (small_addr_symbolic_operand (src
, VOIDmode
))
1108 if (TARGET_AUTO_PIC
)
1109 emit_insn (gen_load_gprel64 (dest
, src
));
1110 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1111 emit_insn (gen_load_fptr (dest
, src
));
1112 else if (sdata_symbolic_operand (src
, VOIDmode
))
1113 emit_insn (gen_load_gprel (dest
, src
));
1114 else if (local_symbolic_operand64 (src
, VOIDmode
))
1116 /* We want to use @gprel rather than @ltoff relocations for local
1118 - @gprel does not require dynamic linker
1119 - and does not use .sdata section
1120 https://gcc.gnu.org/bugzilla/60465 */
1121 emit_insn (gen_load_gprel64 (dest
, src
));
1125 HOST_WIDE_INT addend
= 0;
1128 /* We did split constant offsets in ia64_expand_move, and we did try
1129 to keep them split in move_operand, but we also allowed reload to
1130 rematerialize arbitrary constants rather than spill the value to
1131 the stack and reload it. So we have to be prepared here to split
1132 them apart again. */
1133 if (GET_CODE (src
) == CONST
)
1135 HOST_WIDE_INT hi
, lo
;
1137 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
1138 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
1144 src
= plus_constant (Pmode
, XEXP (XEXP (src
, 0), 0), hi
);
1148 tmp
= gen_rtx_HIGH (Pmode
, src
);
1149 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1150 emit_insn (gen_rtx_SET (dest
, tmp
));
1152 tmp
= gen_rtx_LO_SUM (Pmode
, gen_const_mem (Pmode
, dest
), src
);
1153 emit_insn (gen_rtx_SET (dest
, tmp
));
1157 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
1158 emit_insn (gen_rtx_SET (dest
, tmp
));
1165 static GTY(()) rtx gen_tls_tga
;
1167 gen_tls_get_addr (void)
1170 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1174 static GTY(()) rtx thread_pointer_rtx
;
1176 gen_thread_pointer (void)
1178 if (!thread_pointer_rtx
)
1179 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1180 return thread_pointer_rtx
;
1184 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1185 rtx orig_op1
, HOST_WIDE_INT addend
)
1187 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
;
1190 HOST_WIDE_INT addend_lo
, addend_hi
;
1194 case TLS_MODEL_GLOBAL_DYNAMIC
:
1197 tga_op1
= gen_reg_rtx (Pmode
);
1198 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1200 tga_op2
= gen_reg_rtx (Pmode
);
1201 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1203 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1204 LCT_CONST
, Pmode
, 2, tga_op1
,
1205 Pmode
, tga_op2
, Pmode
);
1207 insns
= get_insns ();
1210 if (GET_MODE (op0
) != Pmode
)
1212 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1215 case TLS_MODEL_LOCAL_DYNAMIC
:
1216 /* ??? This isn't the completely proper way to do local-dynamic
1217 If the call to __tls_get_addr is used only by a single symbol,
1218 then we should (somehow) move the dtprel to the second arg
1219 to avoid the extra add. */
1222 tga_op1
= gen_reg_rtx (Pmode
);
1223 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1225 tga_op2
= const0_rtx
;
1227 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1228 LCT_CONST
, Pmode
, 2, tga_op1
,
1229 Pmode
, tga_op2
, Pmode
);
1231 insns
= get_insns ();
1234 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1236 tmp
= gen_reg_rtx (Pmode
);
1237 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1239 if (!register_operand (op0
, Pmode
))
1240 op0
= gen_reg_rtx (Pmode
);
1243 emit_insn (gen_load_dtprel (op0
, op1
));
1244 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1247 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1250 case TLS_MODEL_INITIAL_EXEC
:
1251 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1252 addend_hi
= addend
- addend_lo
;
1254 op1
= plus_constant (Pmode
, op1
, addend_hi
);
1257 tmp
= gen_reg_rtx (Pmode
);
1258 emit_insn (gen_load_tprel (tmp
, op1
));
1260 if (!register_operand (op0
, Pmode
))
1261 op0
= gen_reg_rtx (Pmode
);
1262 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1265 case TLS_MODEL_LOCAL_EXEC
:
1266 if (!register_operand (op0
, Pmode
))
1267 op0
= gen_reg_rtx (Pmode
);
1273 emit_insn (gen_load_tprel (op0
, op1
));
1274 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1277 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1285 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1286 orig_op0
, 1, OPTAB_DIRECT
);
1287 if (orig_op0
== op0
)
1289 if (GET_MODE (orig_op0
) == Pmode
)
1291 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1295 ia64_expand_move (rtx op0
, rtx op1
)
1297 machine_mode mode
= GET_MODE (op0
);
1299 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1300 op1
= force_reg (mode
, op1
);
1302 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1304 HOST_WIDE_INT addend
= 0;
1305 enum tls_model tls_kind
;
1308 if (GET_CODE (op1
) == CONST
1309 && GET_CODE (XEXP (op1
, 0)) == PLUS
1310 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1312 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1313 sym
= XEXP (XEXP (op1
, 0), 0);
1316 tls_kind
= tls_symbolic_operand_type (sym
);
1318 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1320 if (any_offset_symbol_operand (sym
, mode
))
1322 else if (aligned_offset_symbol_operand (sym
, mode
))
1324 HOST_WIDE_INT addend_lo
, addend_hi
;
1326 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1327 addend_hi
= addend
- addend_lo
;
1331 op1
= plus_constant (mode
, sym
, addend_hi
);
1340 if (reload_completed
)
1342 /* We really should have taken care of this offset earlier. */
1343 gcc_assert (addend
== 0);
1344 if (ia64_expand_load_address (op0
, op1
))
1350 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1352 emit_insn (gen_rtx_SET (subtarget
, op1
));
1354 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1355 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1364 /* Split a move from OP1 to OP0 conditional on COND. */
1367 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1369 rtx_insn
*insn
, *first
= get_last_insn ();
1371 emit_move_insn (op0
, op1
);
1373 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1375 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1379 /* Split a post-reload TImode or TFmode reference into two DImode
1380 components. This is made extra difficult by the fact that we do
1381 not get any scratch registers to work with, because reload cannot
1382 be prevented from giving us a scratch that overlaps the register
1383 pair involved. So instead, when addressing memory, we tweak the
1384 pointer register up and back down with POST_INCs. Or up and not
1385 back down when we can get away with it.
1387 REVERSED is true when the loads must be done in reversed order
1388 (high word first) for correctness. DEAD is true when the pointer
1389 dies with the second insn we generate and therefore the second
1390 address must not carry a postmodify.
1392 May return an insn which is to be emitted after the moves. */
1395 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1399 switch (GET_CODE (in
))
1402 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1403 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1408 /* Cannot occur reversed. */
1409 gcc_assert (!reversed
);
1411 if (GET_MODE (in
) != TFmode
)
1412 split_double (in
, &out
[0], &out
[1]);
1414 /* split_double does not understand how to split a TFmode
1415 quantity into a pair of DImode constants. */
1417 unsigned HOST_WIDE_INT p
[2];
1418 long l
[4]; /* TFmode is 128 bits */
1420 real_to_target (l
, CONST_DOUBLE_REAL_VALUE (in
), TFmode
);
1422 if (FLOAT_WORDS_BIG_ENDIAN
)
1424 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1425 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1429 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1430 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1432 out
[0] = GEN_INT (p
[0]);
1433 out
[1] = GEN_INT (p
[1]);
1439 rtx base
= XEXP (in
, 0);
1442 switch (GET_CODE (base
))
1447 out
[0] = adjust_automodify_address
1448 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1449 out
[1] = adjust_automodify_address
1450 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1454 /* Reversal requires a pre-increment, which can only
1455 be done as a separate insn. */
1456 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1457 out
[0] = adjust_automodify_address
1458 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1459 out
[1] = adjust_address (in
, DImode
, 0);
1464 gcc_assert (!reversed
&& !dead
);
1466 /* Just do the increment in two steps. */
1467 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1468 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1472 gcc_assert (!reversed
&& !dead
);
1474 /* Add 8, subtract 24. */
1475 base
= XEXP (base
, 0);
1476 out
[0] = adjust_automodify_address
1477 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1478 out
[1] = adjust_automodify_address
1480 gen_rtx_POST_MODIFY (Pmode
, base
,
1481 plus_constant (Pmode
, base
, -24)),
1486 gcc_assert (!reversed
&& !dead
);
1488 /* Extract and adjust the modification. This case is
1489 trickier than the others, because we might have an
1490 index register, or we might have a combined offset that
1491 doesn't fit a signed 9-bit displacement field. We can
1492 assume the incoming expression is already legitimate. */
1493 offset
= XEXP (base
, 1);
1494 base
= XEXP (base
, 0);
1496 out
[0] = adjust_automodify_address
1497 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1499 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1501 /* Can't adjust the postmodify to match. Emit the
1502 original, then a separate addition insn. */
1503 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1504 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1508 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1509 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1511 /* Again the postmodify cannot be made to match,
1512 but in this case it's more efficient to get rid
1513 of the postmodify entirely and fix up with an
1515 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1517 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1521 /* Combined offset still fits in the displacement field.
1522 (We cannot overflow it at the high end.) */
1523 out
[1] = adjust_automodify_address
1524 (in
, DImode
, gen_rtx_POST_MODIFY
1525 (Pmode
, base
, gen_rtx_PLUS
1527 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1546 /* Split a TImode or TFmode move instruction after reload.
1547 This is used by *movtf_internal and *movti_internal. */
1549 ia64_split_tmode_move (rtx operands
[])
1551 rtx in
[2], out
[2], insn
;
1554 bool reversed
= false;
1556 /* It is possible for reload to decide to overwrite a pointer with
1557 the value it points to. In that case we have to do the loads in
1558 the appropriate order so that the pointer is not destroyed too
1559 early. Also we must not generate a postmodify for that second
1560 load, or rws_access_regno will die. And we must not generate a
1561 postmodify for the second load if the destination register
1562 overlaps with the base register. */
1563 if (GET_CODE (operands
[1]) == MEM
1564 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1566 rtx base
= XEXP (operands
[1], 0);
1567 while (GET_CODE (base
) != REG
)
1568 base
= XEXP (base
, 0);
1570 if (REGNO (base
) == REGNO (operands
[0]))
1573 if (refers_to_regno_p (REGNO (operands
[0]),
1574 REGNO (operands
[0])+2,
1578 /* Another reason to do the moves in reversed order is if the first
1579 element of the target register pair is also the second element of
1580 the source register pair. */
1581 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1582 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1585 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1586 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1588 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1589 if (GET_CODE (EXP) == MEM \
1590 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1591 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1592 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1593 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1595 insn
= emit_insn (gen_rtx_SET (out
[0], in
[0]));
1596 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1597 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1599 insn
= emit_insn (gen_rtx_SET (out
[1], in
[1]));
1600 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1601 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1604 emit_insn (fixup
[0]);
1606 emit_insn (fixup
[1]);
1608 #undef MAYBE_ADD_REG_INC_NOTE
1611 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1612 through memory plus an extra GR scratch register. Except that you can
1613 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1614 SECONDARY_RELOAD_CLASS, but not both.
1616 We got into problems in the first place by allowing a construct like
1617 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1618 This solution attempts to prevent this situation from occurring. When
1619 we see something like the above, we spill the inner register to memory. */
1622 spill_xfmode_rfmode_operand (rtx in
, int force
, machine_mode mode
)
1624 if (GET_CODE (in
) == SUBREG
1625 && GET_MODE (SUBREG_REG (in
)) == TImode
1626 && GET_CODE (SUBREG_REG (in
)) == REG
)
1628 rtx memt
= assign_stack_temp (TImode
, 16);
1629 emit_move_insn (memt
, SUBREG_REG (in
));
1630 return adjust_address (memt
, mode
, 0);
1632 else if (force
&& GET_CODE (in
) == REG
)
1634 rtx memx
= assign_stack_temp (mode
, 16);
1635 emit_move_insn (memx
, in
);
1642 /* Expand the movxf or movrf pattern (MODE says which) with the given
1643 OPERANDS, returning true if the pattern should then invoke
1647 ia64_expand_movxf_movrf (machine_mode mode
, rtx operands
[])
1649 rtx op0
= operands
[0];
1651 if (GET_CODE (op0
) == SUBREG
)
1652 op0
= SUBREG_REG (op0
);
1654 /* We must support XFmode loads into general registers for stdarg/vararg,
1655 unprototyped calls, and a rare case where a long double is passed as
1656 an argument after a float HFA fills the FP registers. We split them into
1657 DImode loads for convenience. We also need to support XFmode stores
1658 for the last case. This case does not happen for stdarg/vararg routines,
1659 because we do a block store to memory of unnamed arguments. */
1661 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1665 /* We're hoping to transform everything that deals with XFmode
1666 quantities and GR registers early in the compiler. */
1667 gcc_assert (can_create_pseudo_p ());
1669 /* Struct to register can just use TImode instead. */
1670 if ((GET_CODE (operands
[1]) == SUBREG
1671 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1672 || (GET_CODE (operands
[1]) == REG
1673 && GR_REGNO_P (REGNO (operands
[1]))))
1675 rtx op1
= operands
[1];
1677 if (GET_CODE (op1
) == SUBREG
)
1678 op1
= SUBREG_REG (op1
);
1680 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1682 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1686 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1688 /* Don't word-swap when reading in the constant. */
1689 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1690 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1692 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1693 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1698 /* If the quantity is in a register not known to be GR, spill it. */
1699 if (register_operand (operands
[1], mode
))
1700 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1702 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1704 /* Don't word-swap when reading in the value. */
1705 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1706 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1708 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1709 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1713 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1715 /* We're hoping to transform everything that deals with XFmode
1716 quantities and GR registers early in the compiler. */
1717 gcc_assert (can_create_pseudo_p ());
1719 /* Op0 can't be a GR_REG here, as that case is handled above.
1720 If op0 is a register, then we spill op1, so that we now have a
1721 MEM operand. This requires creating an XFmode subreg of a TImode reg
1722 to force the spill. */
1723 if (register_operand (operands
[0], mode
))
1725 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1726 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1727 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1734 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1736 /* Don't word-swap when writing out the value. */
1737 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1738 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1740 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1741 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1746 if (!reload_in_progress
&& !reload_completed
)
1748 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1750 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1752 rtx memt
, memx
, in
= operands
[1];
1753 if (CONSTANT_P (in
))
1754 in
= validize_mem (force_const_mem (mode
, in
));
1755 if (GET_CODE (in
) == MEM
)
1756 memt
= adjust_address (in
, TImode
, 0);
1759 memt
= assign_stack_temp (TImode
, 16);
1760 memx
= adjust_address (memt
, mode
, 0);
1761 emit_move_insn (memx
, in
);
1763 emit_move_insn (op0
, memt
);
1767 if (!ia64_move_ok (operands
[0], operands
[1]))
1768 operands
[1] = force_reg (mode
, operands
[1]);
1774 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1775 with the expression that holds the compare result (in VOIDmode). */
1777 static GTY(()) rtx cmptf_libfunc
;
1780 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1782 enum rtx_code code
= GET_CODE (*expr
);
1785 /* If we have a BImode input, then we already have a compare result, and
1786 do not need to emit another comparison. */
1787 if (GET_MODE (*op0
) == BImode
)
1789 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1792 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1793 magic number as its third argument, that indicates what to do.
1794 The return value is an integer to be compared against zero. */
1795 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1798 QCMP_INV
= 1, /* Raise FP_INVALID on NaNs as a side effect. */
1805 enum rtx_code ncode
;
1808 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1811 /* 1 = equal, 0 = not equal. Equality operators do
1812 not raise FP_INVALID when given a NaN operand. */
1813 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1814 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1815 /* isunordered() from C99. */
1816 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1817 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1818 /* Relational operators raise FP_INVALID when given
1820 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1821 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1822 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1823 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1824 /* Unordered relational operators do not raise FP_INVALID
1825 when given a NaN operand. */
1826 case UNLT
: magic
= QCMP_LT
|QCMP_UNORD
; ncode
= NE
; break;
1827 case UNLE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1828 case UNGT
: magic
= QCMP_GT
|QCMP_UNORD
; ncode
= NE
; break;
1829 case UNGE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1830 /* Not supported. */
1833 default: gcc_unreachable ();
1838 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1839 *op0
, TFmode
, *op1
, TFmode
,
1840 GEN_INT (magic
), DImode
);
1841 cmp
= gen_reg_rtx (BImode
);
1842 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (ncode
, BImode
,
1845 rtx_insn
*insns
= get_insns ();
1848 emit_libcall_block (insns
, cmp
, cmp
,
1849 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1854 cmp
= gen_reg_rtx (BImode
);
1855 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1859 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1864 /* Generate an integral vector comparison. Return true if the condition has
1865 been reversed, and so the sense of the comparison should be inverted. */
1868 ia64_expand_vecint_compare (enum rtx_code code
, machine_mode mode
,
1869 rtx dest
, rtx op0
, rtx op1
)
1871 bool negate
= false;
1874 /* Canonicalize the comparison to EQ, GT, GTU. */
1885 code
= reverse_condition (code
);
1891 code
= reverse_condition (code
);
1897 code
= swap_condition (code
);
1898 x
= op0
, op0
= op1
, op1
= x
;
1905 /* Unsigned parallel compare is not supported by the hardware. Play some
1906 tricks to turn this into a signed comparison against 0. */
1915 /* Subtract (-(INT MAX) - 1) from both operands to make
1917 mask
= gen_int_mode (0x80000000, SImode
);
1918 mask
= gen_rtx_CONST_VECTOR (V2SImode
, gen_rtvec (2, mask
, mask
));
1919 mask
= force_reg (mode
, mask
);
1920 t1
= gen_reg_rtx (mode
);
1921 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1922 t2
= gen_reg_rtx (mode
);
1923 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1932 /* Perform a parallel unsigned saturating subtraction. */
1933 x
= gen_reg_rtx (mode
);
1934 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, op0
, op1
)));
1938 op1
= CONST0_RTX (mode
);
1947 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1948 emit_insn (gen_rtx_SET (dest
, x
));
1953 /* Emit an integral vector conditional move. */
1956 ia64_expand_vecint_cmov (rtx operands
[])
1958 machine_mode mode
= GET_MODE (operands
[0]);
1959 enum rtx_code code
= GET_CODE (operands
[3]);
1963 cmp
= gen_reg_rtx (mode
);
1964 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
1965 operands
[4], operands
[5]);
1967 ot
= operands
[1+negate
];
1968 of
= operands
[2-negate
];
1970 if (ot
== CONST0_RTX (mode
))
1972 if (of
== CONST0_RTX (mode
))
1974 emit_move_insn (operands
[0], ot
);
1978 x
= gen_rtx_NOT (mode
, cmp
);
1979 x
= gen_rtx_AND (mode
, x
, of
);
1980 emit_insn (gen_rtx_SET (operands
[0], x
));
1982 else if (of
== CONST0_RTX (mode
))
1984 x
= gen_rtx_AND (mode
, cmp
, ot
);
1985 emit_insn (gen_rtx_SET (operands
[0], x
));
1991 t
= gen_reg_rtx (mode
);
1992 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
1993 emit_insn (gen_rtx_SET (t
, x
));
1995 f
= gen_reg_rtx (mode
);
1996 x
= gen_rtx_NOT (mode
, cmp
);
1997 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
1998 emit_insn (gen_rtx_SET (f
, x
));
2000 x
= gen_rtx_IOR (mode
, t
, f
);
2001 emit_insn (gen_rtx_SET (operands
[0], x
));
2005 /* Emit an integral vector min or max operation. Return true if all done. */
2008 ia64_expand_vecint_minmax (enum rtx_code code
, machine_mode mode
,
2013 /* These four combinations are supported directly. */
2014 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
2016 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
2019 /* This combination can be implemented with only saturating subtraction. */
2020 if (mode
== V4HImode
&& code
== UMAX
)
2022 rtx x
, tmp
= gen_reg_rtx (mode
);
2024 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
2025 emit_insn (gen_rtx_SET (tmp
, x
));
2027 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
2031 /* Everything else implemented via vector comparisons. */
2032 xops
[0] = operands
[0];
2033 xops
[4] = xops
[1] = operands
[1];
2034 xops
[5] = xops
[2] = operands
[2];
2053 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
2055 ia64_expand_vecint_cmov (xops
);
2059 /* The vectors LO and HI each contain N halves of a double-wide vector.
2060 Reassemble either the first N/2 or the second N/2 elements. */
2063 ia64_unpack_assemble (rtx out
, rtx lo
, rtx hi
, bool highp
)
2065 machine_mode vmode
= GET_MODE (lo
);
2066 unsigned int i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
2067 struct expand_vec_perm_d d
;
2070 d
.target
= gen_lowpart (vmode
, out
);
2071 d
.op0
= (TARGET_BIG_ENDIAN
? hi
: lo
);
2072 d
.op1
= (TARGET_BIG_ENDIAN
? lo
: hi
);
2075 d
.one_operand_p
= false;
2076 d
.testing_p
= false;
2078 high
= (highp
? nelt
/ 2 : 0);
2079 for (i
= 0; i
< nelt
/ 2; ++i
)
2081 d
.perm
[i
* 2] = i
+ high
;
2082 d
.perm
[i
* 2 + 1] = i
+ high
+ nelt
;
2085 ok
= ia64_expand_vec_perm_const_1 (&d
);
2089 /* Return a vector of the sign-extension of VEC. */
2092 ia64_unpack_sign (rtx vec
, bool unsignedp
)
2094 machine_mode mode
= GET_MODE (vec
);
2095 rtx zero
= CONST0_RTX (mode
);
2101 rtx sign
= gen_reg_rtx (mode
);
2104 neg
= ia64_expand_vecint_compare (LT
, mode
, sign
, vec
, zero
);
2111 /* Emit an integral vector unpack operation. */
2114 ia64_expand_unpack (rtx operands
[3], bool unsignedp
, bool highp
)
2116 rtx sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2117 ia64_unpack_assemble (operands
[0], operands
[1], sign
, highp
);
2120 /* Emit an integral vector widening sum operations. */
2123 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
2128 sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2130 wmode
= GET_MODE (operands
[0]);
2131 l
= gen_reg_rtx (wmode
);
2132 h
= gen_reg_rtx (wmode
);
2134 ia64_unpack_assemble (l
, operands
[1], sign
, false);
2135 ia64_unpack_assemble (h
, operands
[1], sign
, true);
2137 t
= expand_binop (wmode
, add_optab
, l
, operands
[2], NULL
, 0, OPTAB_DIRECT
);
2138 t
= expand_binop (wmode
, add_optab
, h
, t
, operands
[0], 0, OPTAB_DIRECT
);
2139 if (t
!= operands
[0])
2140 emit_move_insn (operands
[0], t
);
2143 /* Emit the appropriate sequence for a call. */
2146 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
2151 addr
= XEXP (addr
, 0);
2152 addr
= convert_memory_address (DImode
, addr
);
2153 b0
= gen_rtx_REG (DImode
, R_BR (0));
2155 /* ??? Should do this for functions known to bind local too. */
2156 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2159 insn
= gen_sibcall_nogp (addr
);
2161 insn
= gen_call_nogp (addr
, b0
);
2163 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2164 insn
= emit_call_insn (insn
);
2169 insn
= gen_sibcall_gp (addr
);
2171 insn
= gen_call_gp (addr
, b0
);
2173 insn
= gen_call_value_gp (retval
, addr
, b0
);
2174 insn
= emit_call_insn (insn
);
2176 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2180 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2182 if (TARGET_ABI_OPEN_VMS
)
2183 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2184 gen_rtx_REG (DImode
, GR_REG (25)));
2188 reg_emitted (enum ia64_frame_regs r
)
2190 if (emitted_frame_related_regs
[r
] == 0)
2191 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2193 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2197 get_reg (enum ia64_frame_regs r
)
2200 return current_frame_info
.r
[r
];
2204 is_emitted (int regno
)
2208 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2209 if (emitted_frame_related_regs
[r
] == regno
)
2215 ia64_reload_gp (void)
2219 if (current_frame_info
.r
[reg_save_gp
])
2221 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2225 HOST_WIDE_INT offset
;
2228 offset
= (current_frame_info
.spill_cfa_off
2229 + current_frame_info
.spill_size
);
2230 if (frame_pointer_needed
)
2232 tmp
= hard_frame_pointer_rtx
;
2237 tmp
= stack_pointer_rtx
;
2238 offset
= current_frame_info
.total_size
- offset
;
2241 offset_r
= GEN_INT (offset
);
2242 if (satisfies_constraint_I (offset_r
))
2243 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2246 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2247 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2248 pic_offset_table_rtx
, tmp
));
2251 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2254 emit_move_insn (pic_offset_table_rtx
, tmp
);
2258 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2259 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2262 bool is_desc
= false;
2264 /* If we find we're calling through a register, then we're actually
2265 calling through a descriptor, so load up the values. */
2266 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2271 /* ??? We are currently constrained to *not* use peep2, because
2272 we can legitimately change the global lifetime of the GP
2273 (in the form of killing where previously live). This is
2274 because a call through a descriptor doesn't use the previous
2275 value of the GP, while a direct call does, and we do not
2276 commit to either form until the split here.
2278 That said, this means that we lack precise life info for
2279 whether ADDR is dead after this call. This is not terribly
2280 important, since we can fix things up essentially for free
2281 with the POST_DEC below, but it's nice to not use it when we
2282 can immediately tell it's not necessary. */
2283 addr_dead_p
= ((noreturn_p
|| sibcall_p
2284 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2286 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2288 /* Load the code address into scratch_b. */
2289 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2290 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2291 emit_move_insn (scratch_r
, tmp
);
2292 emit_move_insn (scratch_b
, scratch_r
);
2294 /* Load the GP address. If ADDR is not dead here, then we must
2295 revert the change made above via the POST_INCREMENT. */
2297 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2300 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2301 emit_move_insn (pic_offset_table_rtx
, tmp
);
2308 insn
= gen_sibcall_nogp (addr
);
2310 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2312 insn
= gen_call_nogp (addr
, retaddr
);
2313 emit_call_insn (insn
);
2315 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2319 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2321 This differs from the generic code in that we know about the zero-extending
2322 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2323 also know that ld.acq+cmpxchg.rel equals a full barrier.
2325 The loop we want to generate looks like
2330 new_reg = cmp_reg op val;
2331 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2332 if (cmp_reg != old_reg)
2335 Note that we only do the plain load from memory once. Subsequent
2336 iterations use the value loaded by the compare-and-swap pattern. */
2339 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2340 rtx old_dst
, rtx new_dst
, enum memmodel model
)
2342 machine_mode mode
= GET_MODE (mem
);
2343 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2344 enum insn_code icode
;
2346 /* Special case for using fetchadd. */
2347 if ((mode
== SImode
|| mode
== DImode
)
2348 && (code
== PLUS
|| code
== MINUS
)
2349 && fetchadd_operand (val
, mode
))
2352 val
= GEN_INT (-INTVAL (val
));
2355 old_dst
= gen_reg_rtx (mode
);
2359 case MEMMODEL_ACQ_REL
:
2360 case MEMMODEL_SEQ_CST
:
2361 case MEMMODEL_SYNC_SEQ_CST
:
2362 emit_insn (gen_memory_barrier ());
2364 case MEMMODEL_RELAXED
:
2365 case MEMMODEL_ACQUIRE
:
2366 case MEMMODEL_SYNC_ACQUIRE
:
2367 case MEMMODEL_CONSUME
:
2369 icode
= CODE_FOR_fetchadd_acq_si
;
2371 icode
= CODE_FOR_fetchadd_acq_di
;
2373 case MEMMODEL_RELEASE
:
2374 case MEMMODEL_SYNC_RELEASE
:
2376 icode
= CODE_FOR_fetchadd_rel_si
;
2378 icode
= CODE_FOR_fetchadd_rel_di
;
2385 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2389 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2391 if (new_reg
!= new_dst
)
2392 emit_move_insn (new_dst
, new_reg
);
2397 /* Because of the volatile mem read, we get an ld.acq, which is the
2398 front half of the full barrier. The end half is the cmpxchg.rel.
2399 For relaxed and release memory models, we don't need this. But we
2400 also don't bother trying to prevent it either. */
2401 gcc_assert (is_mm_relaxed (model
) || is_mm_release (model
)
2402 || MEM_VOLATILE_P (mem
));
2404 old_reg
= gen_reg_rtx (DImode
);
2405 cmp_reg
= gen_reg_rtx (DImode
);
2406 label
= gen_label_rtx ();
2410 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2411 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2414 emit_move_insn (cmp_reg
, mem
);
2418 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2419 emit_move_insn (old_reg
, cmp_reg
);
2420 emit_move_insn (ar_ccv
, cmp_reg
);
2423 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2428 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2429 true, OPTAB_DIRECT
);
2430 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2433 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2434 true, OPTAB_DIRECT
);
2437 new_reg
= gen_lowpart (mode
, new_reg
);
2439 emit_move_insn (new_dst
, new_reg
);
2443 case MEMMODEL_RELAXED
:
2444 case MEMMODEL_ACQUIRE
:
2445 case MEMMODEL_SYNC_ACQUIRE
:
2446 case MEMMODEL_CONSUME
:
2449 case QImode
: icode
= CODE_FOR_cmpxchg_acq_qi
; break;
2450 case HImode
: icode
= CODE_FOR_cmpxchg_acq_hi
; break;
2451 case SImode
: icode
= CODE_FOR_cmpxchg_acq_si
; break;
2452 case DImode
: icode
= CODE_FOR_cmpxchg_acq_di
; break;
2458 case MEMMODEL_RELEASE
:
2459 case MEMMODEL_SYNC_RELEASE
:
2460 case MEMMODEL_ACQ_REL
:
2461 case MEMMODEL_SEQ_CST
:
2462 case MEMMODEL_SYNC_SEQ_CST
:
2465 case QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2466 case HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2467 case SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2468 case DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2478 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2480 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2483 /* Begin the assembly file. */
2486 ia64_file_start (void)
2488 default_file_start ();
2489 emit_safe_across_calls ();
2493 emit_safe_across_calls (void)
2495 unsigned int rs
, re
;
2502 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
2506 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
2510 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2514 fputc (',', asm_out_file
);
2516 fprintf (asm_out_file
, "p%u", rs
);
2518 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2522 fputc ('\n', asm_out_file
);
2525 /* Globalize a declaration. */
2528 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2530 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2531 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2534 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2535 const char *p
= TREE_STRING_POINTER (v
);
2536 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2538 targetm
.asm_out
.globalize_label (stream
, name
);
2539 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2540 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2543 /* Helper function for ia64_compute_frame_size: find an appropriate general
2544 register to spill some special register to. SPECIAL_SPILL_MASK contains
2545 bits in GR0 to GR31 that have already been allocated by this routine.
2546 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2549 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2553 if (emitted_frame_related_regs
[r
] != 0)
2555 regno
= emitted_frame_related_regs
[r
];
2556 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2557 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2558 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2559 else if (crtl
->is_leaf
2560 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2561 current_frame_info
.gr_used_mask
|= 1 << regno
;
2566 /* If this is a leaf function, first try an otherwise unused
2567 call-clobbered register. */
2570 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2571 if (! df_regs_ever_live_p (regno
)
2572 && call_used_regs
[regno
]
2573 && ! fixed_regs
[regno
]
2574 && ! global_regs
[regno
]
2575 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2576 && ! is_emitted (regno
))
2578 current_frame_info
.gr_used_mask
|= 1 << regno
;
2585 regno
= current_frame_info
.n_local_regs
;
2586 /* If there is a frame pointer, then we can't use loc79, because
2587 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2588 reg_name switching code in ia64_expand_prologue. */
2589 while (regno
< (80 - frame_pointer_needed
))
2590 if (! is_emitted (LOC_REG (regno
++)))
2592 current_frame_info
.n_local_regs
= regno
;
2593 return LOC_REG (regno
- 1);
2597 /* Failed to find a general register to spill to. Must use stack. */
2601 /* In order to make for nice schedules, we try to allocate every temporary
2602 to a different register. We must of course stay away from call-saved,
2603 fixed, and global registers. We must also stay away from registers
2604 allocated in current_frame_info.gr_used_mask, since those include regs
2605 used all through the prologue.
2607 Any register allocated here must be used immediately. The idea is to
2608 aid scheduling, not to solve data flow problems. */
2610 static int last_scratch_gr_reg
;
2613 next_scratch_gr_reg (void)
2617 for (i
= 0; i
< 32; ++i
)
2619 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2620 if (call_used_regs
[regno
]
2621 && ! fixed_regs
[regno
]
2622 && ! global_regs
[regno
]
2623 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2625 last_scratch_gr_reg
= regno
;
2630 /* There must be _something_ available. */
2634 /* Helper function for ia64_compute_frame_size, called through
2635 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2638 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2640 unsigned int regno
= REGNO (reg
);
2643 unsigned int i
, n
= hard_regno_nregs
[regno
][GET_MODE (reg
)];
2644 for (i
= 0; i
< n
; ++i
)
2645 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2650 /* Returns the number of bytes offset between the frame pointer and the stack
2651 pointer for the current function. SIZE is the number of bytes of space
2652 needed for local variables. */
2655 ia64_compute_frame_size (HOST_WIDE_INT size
)
2657 HOST_WIDE_INT total_size
;
2658 HOST_WIDE_INT spill_size
= 0;
2659 HOST_WIDE_INT extra_spill_size
= 0;
2660 HOST_WIDE_INT pretend_args_size
;
2663 int spilled_gr_p
= 0;
2664 int spilled_fr_p
= 0;
2670 if (current_frame_info
.initialized
)
2673 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2674 CLEAR_HARD_REG_SET (mask
);
2676 /* Don't allocate scratches to the return register. */
2677 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2679 /* Don't allocate scratches to the EH scratch registers. */
2680 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2681 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2682 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2683 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2685 /* Static stack checking uses r2 and r3. */
2686 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
2687 current_frame_info
.gr_used_mask
|= 0xc;
2689 /* Find the size of the register stack frame. We have only 80 local
2690 registers, because we reserve 8 for the inputs and 8 for the
2693 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2694 since we'll be adjusting that down later. */
2695 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2696 for (; regno
>= LOC_REG (0); regno
--)
2697 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2699 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2701 /* For functions marked with the syscall_linkage attribute, we must mark
2702 all eight input registers as in use, so that locals aren't visible to
2705 if (cfun
->machine
->n_varargs
> 0
2706 || lookup_attribute ("syscall_linkage",
2707 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2708 current_frame_info
.n_input_regs
= 8;
2711 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2712 if (df_regs_ever_live_p (regno
))
2714 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2717 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2718 if (df_regs_ever_live_p (regno
))
2720 i
= regno
- OUT_REG (0) + 1;
2722 #ifndef PROFILE_HOOK
2723 /* When -p profiling, we need one output register for the mcount argument.
2724 Likewise for -a profiling for the bb_init_func argument. For -ax
2725 profiling, we need two output registers for the two bb_init_trace_func
2730 current_frame_info
.n_output_regs
= i
;
2732 /* ??? No rotating register support yet. */
2733 current_frame_info
.n_rotate_regs
= 0;
2735 /* Discover which registers need spilling, and how much room that
2736 will take. Begin with floating point and general registers,
2737 which will always wind up on the stack. */
2739 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2740 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2742 SET_HARD_REG_BIT (mask
, regno
);
2748 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2749 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2751 SET_HARD_REG_BIT (mask
, regno
);
2757 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2758 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2760 SET_HARD_REG_BIT (mask
, regno
);
2765 /* Now come all special registers that might get saved in other
2766 general registers. */
2768 if (frame_pointer_needed
)
2770 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2771 /* If we did not get a register, then we take LOC79. This is guaranteed
2772 to be free, even if regs_ever_live is already set, because this is
2773 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2774 as we don't count loc79 above. */
2775 if (current_frame_info
.r
[reg_fp
] == 0)
2777 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2778 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2782 if (! crtl
->is_leaf
)
2784 /* Emit a save of BR0 if we call other functions. Do this even
2785 if this function doesn't return, as EH depends on this to be
2786 able to unwind the stack. */
2787 SET_HARD_REG_BIT (mask
, BR_REG (0));
2789 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2790 if (current_frame_info
.r
[reg_save_b0
] == 0)
2792 extra_spill_size
+= 8;
2796 /* Similarly for ar.pfs. */
2797 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2798 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2799 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2801 extra_spill_size
+= 8;
2805 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2806 registers are clobbered, so we fall back to the stack. */
2807 current_frame_info
.r
[reg_save_gp
]
2808 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2809 if (current_frame_info
.r
[reg_save_gp
] == 0)
2811 SET_HARD_REG_BIT (mask
, GR_REG (1));
2818 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs
[BR_REG (0)])
2820 SET_HARD_REG_BIT (mask
, BR_REG (0));
2821 extra_spill_size
+= 8;
2825 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2827 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2828 current_frame_info
.r
[reg_save_ar_pfs
]
2829 = find_gr_spill (reg_save_ar_pfs
, 1);
2830 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2832 extra_spill_size
+= 8;
2838 /* Unwind descriptor hackery: things are most efficient if we allocate
2839 consecutive GR save registers for RP, PFS, FP in that order. However,
2840 it is absolutely critical that FP get the only hard register that's
2841 guaranteed to be free, so we allocated it first. If all three did
2842 happen to be allocated hard regs, and are consecutive, rearrange them
2843 into the preferred order now.
2845 If we have already emitted code for any of those registers,
2846 then it's already too late to change. */
2847 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2848 MIN (current_frame_info
.r
[reg_save_b0
],
2849 current_frame_info
.r
[reg_save_ar_pfs
]));
2850 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2851 MAX (current_frame_info
.r
[reg_save_b0
],
2852 current_frame_info
.r
[reg_save_ar_pfs
]));
2854 && min_regno
+ 2 == max_regno
2855 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2856 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2857 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2858 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2859 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2860 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2861 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2862 && (emitted_frame_related_regs
[reg_fp
] == 0
2863 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2865 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2866 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2867 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2870 /* See if we need to store the predicate register block. */
2871 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2872 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2874 if (regno
<= PR_REG (63))
2876 SET_HARD_REG_BIT (mask
, PR_REG (0));
2877 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2878 if (current_frame_info
.r
[reg_save_pr
] == 0)
2880 extra_spill_size
+= 8;
2884 /* ??? Mark them all as used so that register renaming and such
2885 are free to use them. */
2886 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2887 df_set_regs_ever_live (regno
, true);
2890 /* If we're forced to use st8.spill, we're forced to save and restore
2891 ar.unat as well. The check for existing liveness allows inline asm
2892 to touch ar.unat. */
2893 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2894 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2896 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2897 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2898 current_frame_info
.r
[reg_save_ar_unat
]
2899 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2900 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2902 extra_spill_size
+= 8;
2907 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2909 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2910 current_frame_info
.r
[reg_save_ar_lc
]
2911 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2912 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2914 extra_spill_size
+= 8;
2919 /* If we have an odd number of words of pretend arguments written to
2920 the stack, then the FR save area will be unaligned. We round the
2921 size of this area up to keep things 16 byte aligned. */
2923 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2925 pretend_args_size
= crtl
->args
.pretend_args_size
;
2927 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2928 + crtl
->outgoing_args_size
);
2929 total_size
= IA64_STACK_ALIGN (total_size
);
2931 /* We always use the 16-byte scratch area provided by the caller, but
2932 if we are a leaf function, there's no one to which we need to provide
2933 a scratch area. However, if the function allocates dynamic stack space,
2934 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2935 so we need to cope. */
2936 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
2937 total_size
= MAX (0, total_size
- 16);
2939 current_frame_info
.total_size
= total_size
;
2940 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2941 current_frame_info
.spill_size
= spill_size
;
2942 current_frame_info
.extra_spill_size
= extra_spill_size
;
2943 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2944 current_frame_info
.n_spilled
= n_spilled
;
2945 current_frame_info
.initialized
= reload_completed
;
2948 /* Worker function for TARGET_CAN_ELIMINATE. */
2951 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2953 return (to
== BR_REG (0) ? crtl
->is_leaf
: true);
2956 /* Compute the initial difference between the specified pair of registers. */
2959 ia64_initial_elimination_offset (int from
, int to
)
2961 HOST_WIDE_INT offset
;
2963 ia64_compute_frame_size (get_frame_size ());
2966 case FRAME_POINTER_REGNUM
:
2969 case HARD_FRAME_POINTER_REGNUM
:
2970 offset
= -current_frame_info
.total_size
;
2971 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2972 offset
+= 16 + crtl
->outgoing_args_size
;
2975 case STACK_POINTER_REGNUM
:
2977 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
2978 offset
+= 16 + crtl
->outgoing_args_size
;
2986 case ARG_POINTER_REGNUM
:
2987 /* Arguments start above the 16 byte save area, unless stdarg
2988 in which case we store through the 16 byte save area. */
2991 case HARD_FRAME_POINTER_REGNUM
:
2992 offset
= 16 - crtl
->args
.pretend_args_size
;
2995 case STACK_POINTER_REGNUM
:
2996 offset
= (current_frame_info
.total_size
2997 + 16 - crtl
->args
.pretend_args_size
);
3012 /* If there are more than a trivial number of register spills, we use
3013 two interleaved iterators so that we can get two memory references
3016 In order to simplify things in the prologue and epilogue expanders,
3017 we use helper functions to fix up the memory references after the
3018 fact with the appropriate offsets to a POST_MODIFY memory mode.
3019 The following data structure tracks the state of the two iterators
3020 while insns are being emitted. */
3022 struct spill_fill_data
3024 rtx_insn
*init_after
; /* point at which to emit initializations */
3025 rtx init_reg
[2]; /* initial base register */
3026 rtx iter_reg
[2]; /* the iterator registers */
3027 rtx
*prev_addr
[2]; /* address of last memory use */
3028 rtx_insn
*prev_insn
[2]; /* the insn corresponding to prev_addr */
3029 HOST_WIDE_INT prev_off
[2]; /* last offset */
3030 int n_iter
; /* number of iterators in use */
3031 int next_iter
; /* next iterator to use */
3032 unsigned int save_gr_used_mask
;
3035 static struct spill_fill_data spill_fill_data
;
3038 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
3042 spill_fill_data
.init_after
= get_last_insn ();
3043 spill_fill_data
.init_reg
[0] = init_reg
;
3044 spill_fill_data
.init_reg
[1] = init_reg
;
3045 spill_fill_data
.prev_addr
[0] = NULL
;
3046 spill_fill_data
.prev_addr
[1] = NULL
;
3047 spill_fill_data
.prev_insn
[0] = NULL
;
3048 spill_fill_data
.prev_insn
[1] = NULL
;
3049 spill_fill_data
.prev_off
[0] = cfa_off
;
3050 spill_fill_data
.prev_off
[1] = cfa_off
;
3051 spill_fill_data
.next_iter
= 0;
3052 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
3054 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
3055 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
3057 int regno
= next_scratch_gr_reg ();
3058 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
3059 current_frame_info
.gr_used_mask
|= 1 << regno
;
3064 finish_spill_pointers (void)
3066 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
3070 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
3072 int iter
= spill_fill_data
.next_iter
;
3073 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
3074 rtx disp_rtx
= GEN_INT (disp
);
3077 if (spill_fill_data
.prev_addr
[iter
])
3079 if (satisfies_constraint_N (disp_rtx
))
3081 *spill_fill_data
.prev_addr
[iter
]
3082 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
3083 gen_rtx_PLUS (DImode
,
3084 spill_fill_data
.iter_reg
[iter
],
3086 add_reg_note (spill_fill_data
.prev_insn
[iter
],
3087 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
3091 /* ??? Could use register post_modify for loads. */
3092 if (!satisfies_constraint_I (disp_rtx
))
3094 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3095 emit_move_insn (tmp
, disp_rtx
);
3098 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3099 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
3102 /* Micro-optimization: if we've created a frame pointer, it's at
3103 CFA 0, which may allow the real iterator to be initialized lower,
3104 slightly increasing parallelism. Also, if there are few saves
3105 it may eliminate the iterator entirely. */
3107 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
3108 && frame_pointer_needed
)
3110 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
3111 set_mem_alias_set (mem
, get_varargs_alias_set ());
3120 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
3121 spill_fill_data
.init_reg
[iter
]);
3126 if (!satisfies_constraint_I (disp_rtx
))
3128 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3129 emit_move_insn (tmp
, disp_rtx
);
3133 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3134 spill_fill_data
.init_reg
[iter
],
3141 /* Careful for being the first insn in a sequence. */
3142 if (spill_fill_data
.init_after
)
3143 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
3146 rtx_insn
*first
= get_insns ();
3148 insn
= emit_insn_before (seq
, first
);
3150 insn
= emit_insn (seq
);
3152 spill_fill_data
.init_after
= insn
;
3155 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
3157 /* ??? Not all of the spills are for varargs, but some of them are.
3158 The rest of the spills belong in an alias set of their own. But
3159 it doesn't actually hurt to include them here. */
3160 set_mem_alias_set (mem
, get_varargs_alias_set ());
3162 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
3163 spill_fill_data
.prev_off
[iter
] = cfa_off
;
3165 if (++iter
>= spill_fill_data
.n_iter
)
3167 spill_fill_data
.next_iter
= iter
;
3173 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
3176 int iter
= spill_fill_data
.next_iter
;
3180 mem
= spill_restore_mem (reg
, cfa_off
);
3181 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
3182 spill_fill_data
.prev_insn
[iter
] = insn
;
3189 RTX_FRAME_RELATED_P (insn
) = 1;
3191 /* Don't even pretend that the unwind code can intuit its way
3192 through a pair of interleaved post_modify iterators. Just
3193 provide the correct answer. */
3195 if (frame_pointer_needed
)
3197 base
= hard_frame_pointer_rtx
;
3202 base
= stack_pointer_rtx
;
3203 off
= current_frame_info
.total_size
- cfa_off
;
3206 add_reg_note (insn
, REG_CFA_OFFSET
,
3207 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg
),
3208 plus_constant (Pmode
,
3215 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3217 int iter
= spill_fill_data
.next_iter
;
3220 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3221 GEN_INT (cfa_off
)));
3222 spill_fill_data
.prev_insn
[iter
] = insn
;
3225 /* Wrapper functions that discards the CONST_INT spill offset. These
3226 exist so that we can give gr_spill/gr_fill the offset they need and
3227 use a consistent function interface. */
3230 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3232 return gen_movdi (dest
, src
);
3236 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3238 return gen_fr_spill (dest
, src
);
3242 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3244 return gen_fr_restore (dest
, src
);
3247 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3249 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3250 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3252 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3253 inclusive. These are offsets from the current stack pointer. BS_SIZE
3254 is the size of the backing store. ??? This clobbers r2 and r3. */
3257 ia64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
3260 rtx r2
= gen_rtx_REG (Pmode
, GR_REG (2));
3261 rtx r3
= gen_rtx_REG (Pmode
, GR_REG (3));
3262 rtx p6
= gen_rtx_REG (BImode
, PR_REG (6));
3264 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3265 of the Register Stack Engine. We also need to probe it after checking
3266 that the 2 stacks don't overlap. */
3267 emit_insn (gen_bsp_value (r3
));
3268 emit_move_insn (r2
, GEN_INT (-(first
+ size
)));
3270 /* Compare current value of BSP and SP registers. */
3271 emit_insn (gen_rtx_SET (p6
, gen_rtx_fmt_ee (LTU
, BImode
,
3272 r3
, stack_pointer_rtx
)));
3274 /* Compute the address of the probe for the Backing Store (which grows
3275 towards higher addresses). We probe only at the first offset of
3276 the next page because some OS (eg Linux/ia64) only extend the
3277 backing store when this specific address is hit (but generate a SEGV
3278 on other address). Page size is the worst case (4KB). The reserve
3279 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3280 Also compute the address of the last probe for the memory stack
3281 (which grows towards lower addresses). */
3282 emit_insn (gen_rtx_SET (r3
, plus_constant (Pmode
, r3
, 4095)));
3283 emit_insn (gen_rtx_SET (r2
, gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3285 /* Compare them and raise SEGV if the former has topped the latter. */
3286 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3287 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3288 gen_rtx_SET (p6
, gen_rtx_fmt_ee (GEU
, BImode
,
3290 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode
, r3
, GEN_INT (12),
3293 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3294 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3295 gen_rtx_TRAP_IF (VOIDmode
, const1_rtx
,
3298 /* Probe the Backing Store if necessary. */
3300 emit_stack_probe (r3
);
3302 /* Probe the memory stack if necessary. */
3306 /* See if we have a constant small number of probes to generate. If so,
3307 that's the easy case. */
3308 else if (size
<= PROBE_INTERVAL
)
3309 emit_stack_probe (r2
);
3311 /* The run-time loop is made up of 9 insns in the generic case while this
3312 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3313 else if (size
<= 4 * PROBE_INTERVAL
)
3317 emit_move_insn (r2
, GEN_INT (-(first
+ PROBE_INTERVAL
)));
3318 emit_insn (gen_rtx_SET (r2
,
3319 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3320 emit_stack_probe (r2
);
3322 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3323 it exceeds SIZE. If only two probes are needed, this will not
3324 generate any code. Then probe at FIRST + SIZE. */
3325 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
3327 emit_insn (gen_rtx_SET (r2
,
3328 plus_constant (Pmode
, r2
, -PROBE_INTERVAL
)));
3329 emit_stack_probe (r2
);
3332 emit_insn (gen_rtx_SET (r2
,
3333 plus_constant (Pmode
, r2
,
3334 (i
- PROBE_INTERVAL
) - size
)));
3335 emit_stack_probe (r2
);
3338 /* Otherwise, do the same as above, but in a loop. Note that we must be
3339 extra careful with variables wrapping around because we might be at
3340 the very top (or the very bottom) of the address space and we have
3341 to be able to handle this case properly; in particular, we use an
3342 equality test for the loop condition. */
3345 HOST_WIDE_INT rounded_size
;
3347 emit_move_insn (r2
, GEN_INT (-first
));
3350 /* Step 1: round SIZE to the previous multiple of the interval. */
3352 rounded_size
= size
& -PROBE_INTERVAL
;
3355 /* Step 2: compute initial and final value of the loop counter. */
3357 /* TEST_ADDR = SP + FIRST. */
3358 emit_insn (gen_rtx_SET (r2
,
3359 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3361 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3362 if (rounded_size
> (1 << 21))
3364 emit_move_insn (r3
, GEN_INT (-rounded_size
));
3365 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
, r3
)));
3368 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
,
3369 GEN_INT (-rounded_size
))));
3376 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3379 while (TEST_ADDR != LAST_ADDR)
3381 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3382 until it is equal to ROUNDED_SIZE. */
3384 emit_insn (gen_probe_stack_range (r2
, r2
, r3
));
3387 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3388 that SIZE is equal to ROUNDED_SIZE. */
3390 /* TEMP = SIZE - ROUNDED_SIZE. */
3391 if (size
!= rounded_size
)
3393 emit_insn (gen_rtx_SET (r2
, plus_constant (Pmode
, r2
,
3394 rounded_size
- size
)));
3395 emit_stack_probe (r2
);
3399 /* Make sure nothing is scheduled before we are done. */
3400 emit_insn (gen_blockage ());
3403 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3404 absolute addresses. */
3407 output_probe_stack_range (rtx reg1
, rtx reg2
)
3409 static int labelno
= 0;
3413 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
3416 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
3418 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3420 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
3421 output_asm_insn ("addl %0 = %1, %0", xops
);
3422 fputs ("\t;;\n", asm_out_file
);
3424 /* Probe at TEST_ADDR. */
3425 output_asm_insn ("probe.w.fault %0, 0", xops
);
3427 /* Test if TEST_ADDR == LAST_ADDR. */
3429 xops
[2] = gen_rtx_REG (BImode
, PR_REG (6));
3430 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops
);
3433 fprintf (asm_out_file
, "\t(%s) br.cond.dpnt ", reg_names
[PR_REG (7)]);
3434 assemble_name_raw (asm_out_file
, loop_lab
);
3435 fputc ('\n', asm_out_file
);
3440 /* Called after register allocation to add any instructions needed for the
3441 prologue. Using a prologue insn is favored compared to putting all of the
3442 instructions in output_function_prologue(), since it allows the scheduler
3443 to intermix instructions with the saves of the caller saved registers. In
3444 some cases, it might be necessary to emit a barrier instruction as the last
3445 insn to prevent such scheduling.
3447 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3448 so that the debug info generation code can handle them properly.
3450 The register save area is laid out like so:
3452 [ varargs spill area ]
3453 [ fr register spill area ]
3454 [ br register spill area ]
3455 [ ar register spill area ]
3456 [ pr register spill area ]
3457 [ gr register spill area ] */
3459 /* ??? Get inefficient code when the frame size is larger than can fit in an
3460 adds instruction. */
3463 ia64_expand_prologue (void)
3466 rtx ar_pfs_save_reg
, ar_unat_save_reg
;
3467 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3470 ia64_compute_frame_size (get_frame_size ());
3471 last_scratch_gr_reg
= 15;
3473 if (flag_stack_usage_info
)
3474 current_function_static_stack_size
= current_frame_info
.total_size
;
3476 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3478 HOST_WIDE_INT size
= current_frame_info
.total_size
;
3479 int bs_size
= BACKING_STORE_SIZE (current_frame_info
.n_input_regs
3480 + current_frame_info
.n_local_regs
);
3482 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3484 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
3485 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
,
3486 size
- STACK_CHECK_PROTECT
,
3488 else if (size
+ bs_size
> STACK_CHECK_PROTECT
)
3489 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, 0, bs_size
);
3491 else if (size
+ bs_size
> 0)
3492 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
, bs_size
);
3497 fprintf (dump_file
, "ia64 frame related registers "
3498 "recorded in current_frame_info.r[]:\n");
3499 #define PRINTREG(a) if (current_frame_info.r[a]) \
3500 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3502 PRINTREG(reg_save_b0
);
3503 PRINTREG(reg_save_pr
);
3504 PRINTREG(reg_save_ar_pfs
);
3505 PRINTREG(reg_save_ar_unat
);
3506 PRINTREG(reg_save_ar_lc
);
3507 PRINTREG(reg_save_gp
);
3511 /* If there is no epilogue, then we don't need some prologue insns.
3512 We need to avoid emitting the dead prologue insns, because flow
3513 will complain about them. */
3519 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
3520 if ((e
->flags
& EDGE_FAKE
) == 0
3521 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3523 epilogue_p
= (e
!= NULL
);
3528 /* Set the local, input, and output register names. We need to do this
3529 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3530 half. If we use in/loc/out register names, then we get assembler errors
3531 in crtn.S because there is no alloc insn or regstk directive in there. */
3532 if (! TARGET_REG_NAMES
)
3534 int inputs
= current_frame_info
.n_input_regs
;
3535 int locals
= current_frame_info
.n_local_regs
;
3536 int outputs
= current_frame_info
.n_output_regs
;
3538 for (i
= 0; i
< inputs
; i
++)
3539 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3540 for (i
= 0; i
< locals
; i
++)
3541 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3542 for (i
= 0; i
< outputs
; i
++)
3543 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3546 /* Set the frame pointer register name. The regnum is logically loc79,
3547 but of course we'll not have allocated that many locals. Rather than
3548 worrying about renumbering the existing rtxs, we adjust the name. */
3549 /* ??? This code means that we can never use one local register when
3550 there is a frame pointer. loc79 gets wasted in this case, as it is
3551 renamed to a register that will never be used. See also the try_locals
3552 code in find_gr_spill. */
3553 if (current_frame_info
.r
[reg_fp
])
3555 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3556 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3557 = reg_names
[current_frame_info
.r
[reg_fp
]];
3558 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3561 /* We don't need an alloc instruction if we've used no outputs or locals. */
3562 if (current_frame_info
.n_local_regs
== 0
3563 && current_frame_info
.n_output_regs
== 0
3564 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3565 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3567 /* If there is no alloc, but there are input registers used, then we
3568 need a .regstk directive. */
3569 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3570 ar_pfs_save_reg
= NULL_RTX
;
3574 current_frame_info
.need_regstk
= 0;
3576 if (current_frame_info
.r
[reg_save_ar_pfs
])
3578 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3579 reg_emitted (reg_save_ar_pfs
);
3582 regno
= next_scratch_gr_reg ();
3583 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3585 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3586 GEN_INT (current_frame_info
.n_input_regs
),
3587 GEN_INT (current_frame_info
.n_local_regs
),
3588 GEN_INT (current_frame_info
.n_output_regs
),
3589 GEN_INT (current_frame_info
.n_rotate_regs
)));
3590 if (current_frame_info
.r
[reg_save_ar_pfs
])
3592 RTX_FRAME_RELATED_P (insn
) = 1;
3593 add_reg_note (insn
, REG_CFA_REGISTER
,
3594 gen_rtx_SET (ar_pfs_save_reg
,
3595 gen_rtx_REG (DImode
, AR_PFS_REGNUM
)));
3599 /* Set up frame pointer, stack pointer, and spill iterators. */
3601 n_varargs
= cfun
->machine
->n_varargs
;
3602 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3603 stack_pointer_rtx
, 0);
3605 if (frame_pointer_needed
)
3607 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3608 RTX_FRAME_RELATED_P (insn
) = 1;
3610 /* Force the unwind info to recognize this as defining a new CFA,
3611 rather than some temp register setup. */
3612 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL_RTX
);
3615 if (current_frame_info
.total_size
!= 0)
3617 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3620 if (satisfies_constraint_I (frame_size_rtx
))
3621 offset
= frame_size_rtx
;
3624 regno
= next_scratch_gr_reg ();
3625 offset
= gen_rtx_REG (DImode
, regno
);
3626 emit_move_insn (offset
, frame_size_rtx
);
3629 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3630 stack_pointer_rtx
, offset
));
3632 if (! frame_pointer_needed
)
3634 RTX_FRAME_RELATED_P (insn
) = 1;
3635 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3636 gen_rtx_SET (stack_pointer_rtx
,
3637 gen_rtx_PLUS (DImode
,
3642 /* ??? At this point we must generate a magic insn that appears to
3643 modify the stack pointer, the frame pointer, and all spill
3644 iterators. This would allow the most scheduling freedom. For
3645 now, just hard stop. */
3646 emit_insn (gen_blockage ());
3649 /* Must copy out ar.unat before doing any integer spills. */
3650 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3652 if (current_frame_info
.r
[reg_save_ar_unat
])
3655 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3656 reg_emitted (reg_save_ar_unat
);
3660 alt_regno
= next_scratch_gr_reg ();
3661 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3662 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3665 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3666 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3667 if (current_frame_info
.r
[reg_save_ar_unat
])
3669 RTX_FRAME_RELATED_P (insn
) = 1;
3670 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3673 /* Even if we're not going to generate an epilogue, we still
3674 need to save the register so that EH works. */
3675 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3676 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3679 ar_unat_save_reg
= NULL_RTX
;
3681 /* Spill all varargs registers. Do this before spilling any GR registers,
3682 since we want the UNAT bits for the GR registers to override the UNAT
3683 bits from varargs, which we don't care about. */
3686 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3688 reg
= gen_rtx_REG (DImode
, regno
);
3689 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3692 /* Locate the bottom of the register save area. */
3693 cfa_off
= (current_frame_info
.spill_cfa_off
3694 + current_frame_info
.spill_size
3695 + current_frame_info
.extra_spill_size
);
3697 /* Save the predicate register block either in a register or in memory. */
3698 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3700 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3701 if (current_frame_info
.r
[reg_save_pr
] != 0)
3703 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3704 reg_emitted (reg_save_pr
);
3705 insn
= emit_move_insn (alt_reg
, reg
);
3707 /* ??? Denote pr spill/fill by a DImode move that modifies all
3708 64 hard registers. */
3709 RTX_FRAME_RELATED_P (insn
) = 1;
3710 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3712 /* Even if we're not going to generate an epilogue, we still
3713 need to save the register so that EH works. */
3715 emit_insn (gen_prologue_use (alt_reg
));
3719 alt_regno
= next_scratch_gr_reg ();
3720 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3721 insn
= emit_move_insn (alt_reg
, reg
);
3722 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3727 /* Handle AR regs in numerical order. All of them get special handling. */
3728 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3729 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3731 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3732 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3736 /* The alloc insn already copied ar.pfs into a general register. The
3737 only thing we have to do now is copy that register to a stack slot
3738 if we'd not allocated a local register for the job. */
3739 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3740 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3742 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3743 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3747 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3749 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3750 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3752 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3753 reg_emitted (reg_save_ar_lc
);
3754 insn
= emit_move_insn (alt_reg
, reg
);
3755 RTX_FRAME_RELATED_P (insn
) = 1;
3756 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3758 /* Even if we're not going to generate an epilogue, we still
3759 need to save the register so that EH works. */
3761 emit_insn (gen_prologue_use (alt_reg
));
3765 alt_regno
= next_scratch_gr_reg ();
3766 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3767 emit_move_insn (alt_reg
, reg
);
3768 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3773 /* Save the return pointer. */
3774 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3776 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3777 if (current_frame_info
.r
[reg_save_b0
] != 0)
3779 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3780 reg_emitted (reg_save_b0
);
3781 insn
= emit_move_insn (alt_reg
, reg
);
3782 RTX_FRAME_RELATED_P (insn
) = 1;
3783 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (alt_reg
, pc_rtx
));
3785 /* Even if we're not going to generate an epilogue, we still
3786 need to save the register so that EH works. */
3788 emit_insn (gen_prologue_use (alt_reg
));
3792 alt_regno
= next_scratch_gr_reg ();
3793 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3794 emit_move_insn (alt_reg
, reg
);
3795 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3800 if (current_frame_info
.r
[reg_save_gp
])
3802 reg_emitted (reg_save_gp
);
3803 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3804 current_frame_info
.r
[reg_save_gp
]),
3805 pic_offset_table_rtx
);
3808 /* We should now be at the base of the gr/br/fr spill area. */
3809 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3810 + current_frame_info
.spill_size
));
3812 /* Spill all general registers. */
3813 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3814 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3816 reg
= gen_rtx_REG (DImode
, regno
);
3817 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3821 /* Spill the rest of the BR registers. */
3822 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3823 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3825 alt_regno
= next_scratch_gr_reg ();
3826 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3827 reg
= gen_rtx_REG (DImode
, regno
);
3828 emit_move_insn (alt_reg
, reg
);
3829 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3833 /* Align the frame and spill all FR registers. */
3834 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3835 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3837 gcc_assert (!(cfa_off
& 15));
3838 reg
= gen_rtx_REG (XFmode
, regno
);
3839 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3843 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3845 finish_spill_pointers ();
3848 /* Output the textual info surrounding the prologue. */
3851 ia64_start_function (FILE *file
, const char *fnname
,
3852 tree decl ATTRIBUTE_UNUSED
)
3854 #if TARGET_ABI_OPEN_VMS
3855 vms_start_function (fnname
);
3858 fputs ("\t.proc ", file
);
3859 assemble_name (file
, fnname
);
3861 ASM_OUTPUT_LABEL (file
, fnname
);
3864 /* Called after register allocation to add any instructions needed for the
3865 epilogue. Using an epilogue insn is favored compared to putting all of the
3866 instructions in output_function_prologue(), since it allows the scheduler
3867 to intermix instructions with the saves of the caller saved registers. In
3868 some cases, it might be necessary to emit a barrier instruction as the last
3869 insn to prevent such scheduling. */
3872 ia64_expand_epilogue (int sibcall_p
)
3875 rtx reg
, alt_reg
, ar_unat_save_reg
;
3876 int regno
, alt_regno
, cfa_off
;
3878 ia64_compute_frame_size (get_frame_size ());
3880 /* If there is a frame pointer, then we use it instead of the stack
3881 pointer, so that the stack pointer does not need to be valid when
3882 the epilogue starts. See EXIT_IGNORE_STACK. */
3883 if (frame_pointer_needed
)
3884 setup_spill_pointers (current_frame_info
.n_spilled
,
3885 hard_frame_pointer_rtx
, 0);
3887 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3888 current_frame_info
.total_size
);
3890 if (current_frame_info
.total_size
!= 0)
3892 /* ??? At this point we must generate a magic insn that appears to
3893 modify the spill iterators and the frame pointer. This would
3894 allow the most scheduling freedom. For now, just hard stop. */
3895 emit_insn (gen_blockage ());
3898 /* Locate the bottom of the register save area. */
3899 cfa_off
= (current_frame_info
.spill_cfa_off
3900 + current_frame_info
.spill_size
3901 + current_frame_info
.extra_spill_size
);
3903 /* Restore the predicate registers. */
3904 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3906 if (current_frame_info
.r
[reg_save_pr
] != 0)
3908 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3909 reg_emitted (reg_save_pr
);
3913 alt_regno
= next_scratch_gr_reg ();
3914 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3915 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3918 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3919 emit_move_insn (reg
, alt_reg
);
3922 /* Restore the application registers. */
3924 /* Load the saved unat from the stack, but do not restore it until
3925 after the GRs have been restored. */
3926 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3928 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3931 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3932 reg_emitted (reg_save_ar_unat
);
3936 alt_regno
= next_scratch_gr_reg ();
3937 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3938 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3939 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3944 ar_unat_save_reg
= NULL_RTX
;
3946 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3948 reg_emitted (reg_save_ar_pfs
);
3949 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3950 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3951 emit_move_insn (reg
, alt_reg
);
3953 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3955 alt_regno
= next_scratch_gr_reg ();
3956 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3957 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3959 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3960 emit_move_insn (reg
, alt_reg
);
3963 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3965 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3967 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3968 reg_emitted (reg_save_ar_lc
);
3972 alt_regno
= next_scratch_gr_reg ();
3973 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3974 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3977 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3978 emit_move_insn (reg
, alt_reg
);
3981 /* Restore the return pointer. */
3982 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3984 if (current_frame_info
.r
[reg_save_b0
] != 0)
3986 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3987 reg_emitted (reg_save_b0
);
3991 alt_regno
= next_scratch_gr_reg ();
3992 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3993 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3996 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3997 emit_move_insn (reg
, alt_reg
);
4000 /* We should now be at the base of the gr/br/fr spill area. */
4001 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
4002 + current_frame_info
.spill_size
));
4004 /* The GP may be stored on the stack in the prologue, but it's
4005 never restored in the epilogue. Skip the stack slot. */
4006 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
4009 /* Restore all general registers. */
4010 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
4011 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4013 reg
= gen_rtx_REG (DImode
, regno
);
4014 do_restore (gen_gr_restore
, reg
, cfa_off
);
4018 /* Restore the branch registers. */
4019 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
4020 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4022 alt_regno
= next_scratch_gr_reg ();
4023 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4024 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4026 reg
= gen_rtx_REG (DImode
, regno
);
4027 emit_move_insn (reg
, alt_reg
);
4030 /* Restore floating point registers. */
4031 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
4032 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4034 gcc_assert (!(cfa_off
& 15));
4035 reg
= gen_rtx_REG (XFmode
, regno
);
4036 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
4040 /* Restore ar.unat for real. */
4041 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
4043 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
4044 emit_move_insn (reg
, ar_unat_save_reg
);
4047 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
4049 finish_spill_pointers ();
4051 if (current_frame_info
.total_size
4052 || cfun
->machine
->ia64_eh_epilogue_sp
4053 || frame_pointer_needed
)
4055 /* ??? At this point we must generate a magic insn that appears to
4056 modify the spill iterators, the stack pointer, and the frame
4057 pointer. This would allow the most scheduling freedom. For now,
4059 emit_insn (gen_blockage ());
4062 if (cfun
->machine
->ia64_eh_epilogue_sp
)
4063 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
4064 else if (frame_pointer_needed
)
4066 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
4067 RTX_FRAME_RELATED_P (insn
) = 1;
4068 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
4070 else if (current_frame_info
.total_size
)
4072 rtx offset
, frame_size_rtx
;
4074 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
4075 if (satisfies_constraint_I (frame_size_rtx
))
4076 offset
= frame_size_rtx
;
4079 regno
= next_scratch_gr_reg ();
4080 offset
= gen_rtx_REG (DImode
, regno
);
4081 emit_move_insn (offset
, frame_size_rtx
);
4084 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
4087 RTX_FRAME_RELATED_P (insn
) = 1;
4088 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
4089 gen_rtx_SET (stack_pointer_rtx
,
4090 gen_rtx_PLUS (DImode
,
4095 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
4096 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
4099 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
4102 int fp
= GR_REG (2);
4103 /* We need a throw away register here, r0 and r1 are reserved,
4104 so r2 is the first available call clobbered register. If
4105 there was a frame_pointer register, we may have swapped the
4106 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4107 sure we're using the string "r2" when emitting the register
4108 name for the assembler. */
4109 if (current_frame_info
.r
[reg_fp
]
4110 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
4111 fp
= HARD_FRAME_POINTER_REGNUM
;
4113 /* We must emit an alloc to force the input registers to become output
4114 registers. Otherwise, if the callee tries to pass its parameters
4115 through to another call without an intervening alloc, then these
4117 /* ??? We don't need to preserve all input registers. We only need to
4118 preserve those input registers used as arguments to the sibling call.
4119 It is unclear how to compute that number here. */
4120 if (current_frame_info
.n_input_regs
!= 0)
4122 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
4124 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
4125 const0_rtx
, const0_rtx
,
4126 n_inputs
, const0_rtx
));
4127 RTX_FRAME_RELATED_P (insn
) = 1;
4129 /* ??? We need to mark the alloc as frame-related so that it gets
4130 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4131 But there's nothing dwarf2 related to be done wrt the register
4132 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4133 the empty parallel means dwarf2out will not see anything. */
4134 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4135 gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (0)));
4140 /* Return 1 if br.ret can do all the work required to return from a
4144 ia64_direct_return (void)
4146 if (reload_completed
&& ! frame_pointer_needed
)
4148 ia64_compute_frame_size (get_frame_size ());
4150 return (current_frame_info
.total_size
== 0
4151 && current_frame_info
.n_spilled
== 0
4152 && current_frame_info
.r
[reg_save_b0
] == 0
4153 && current_frame_info
.r
[reg_save_pr
] == 0
4154 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
4155 && current_frame_info
.r
[reg_save_ar_unat
] == 0
4156 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
4161 /* Return the magic cookie that we use to hold the return address
4162 during early compilation. */
4165 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
4169 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
4172 /* Split this value after reload, now that we know where the return
4173 address is saved. */
4176 ia64_split_return_addr_rtx (rtx dest
)
4180 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4182 if (current_frame_info
.r
[reg_save_b0
] != 0)
4184 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4185 reg_emitted (reg_save_b0
);
4193 /* Compute offset from CFA for BR0. */
4194 /* ??? Must be kept in sync with ia64_expand_prologue. */
4195 off
= (current_frame_info
.spill_cfa_off
4196 + current_frame_info
.spill_size
);
4197 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
4198 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4201 /* Convert CFA offset to a register based offset. */
4202 if (frame_pointer_needed
)
4203 src
= hard_frame_pointer_rtx
;
4206 src
= stack_pointer_rtx
;
4207 off
+= current_frame_info
.total_size
;
4210 /* Load address into scratch register. */
4211 off_r
= GEN_INT (off
);
4212 if (satisfies_constraint_I (off_r
))
4213 emit_insn (gen_adddi3 (dest
, src
, off_r
));
4216 emit_move_insn (dest
, off_r
);
4217 emit_insn (gen_adddi3 (dest
, src
, dest
));
4220 src
= gen_rtx_MEM (Pmode
, dest
);
4224 src
= gen_rtx_REG (DImode
, BR_REG (0));
4226 emit_move_insn (dest
, src
);
4230 ia64_hard_regno_rename_ok (int from
, int to
)
4232 /* Don't clobber any of the registers we reserved for the prologue. */
4235 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
4236 if (to
== current_frame_info
.r
[r
]
4237 || from
== current_frame_info
.r
[r
]
4238 || to
== emitted_frame_related_regs
[r
]
4239 || from
== emitted_frame_related_regs
[r
])
4242 /* Don't use output registers outside the register frame. */
4243 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
4246 /* Retain even/oddness on predicate register pairs. */
4247 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
4248 return (from
& 1) == (to
& 1);
4253 /* Target hook for assembling integer objects. Handle word-sized
4254 aligned objects and detect the cases when @fptr is needed. */
4257 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4259 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
4260 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
4261 && GET_CODE (x
) == SYMBOL_REF
4262 && SYMBOL_REF_FUNCTION_P (x
))
4264 static const char * const directive
[2][2] = {
4265 /* 64-bit pointer */ /* 32-bit pointer */
4266 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4267 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4269 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
4270 output_addr_const (asm_out_file
, x
);
4271 fputs (")\n", asm_out_file
);
4274 return default_assemble_integer (x
, size
, aligned_p
);
4277 /* Emit the function prologue. */
4280 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4282 int mask
, grsave
, grsave_prev
;
4284 if (current_frame_info
.need_regstk
)
4285 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
4286 current_frame_info
.n_input_regs
,
4287 current_frame_info
.n_local_regs
,
4288 current_frame_info
.n_output_regs
,
4289 current_frame_info
.n_rotate_regs
);
4291 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4294 /* Emit the .prologue directive. */
4297 grsave
= grsave_prev
= 0;
4298 if (current_frame_info
.r
[reg_save_b0
] != 0)
4301 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
4303 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
4304 && (grsave_prev
== 0
4305 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
4308 if (grsave_prev
== 0)
4309 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
4310 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
4312 if (current_frame_info
.r
[reg_fp
] != 0
4313 && (grsave_prev
== 0
4314 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
4317 if (grsave_prev
== 0)
4318 grsave
= HARD_FRAME_POINTER_REGNUM
;
4319 grsave_prev
= current_frame_info
.r
[reg_fp
];
4321 if (current_frame_info
.r
[reg_save_pr
] != 0
4322 && (grsave_prev
== 0
4323 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
4326 if (grsave_prev
== 0)
4327 grsave
= current_frame_info
.r
[reg_save_pr
];
4330 if (mask
&& TARGET_GNU_AS
)
4331 fprintf (file
, "\t.prologue %d, %d\n", mask
,
4332 ia64_dbx_register_number (grsave
));
4334 fputs ("\t.prologue\n", file
);
4336 /* Emit a .spill directive, if necessary, to relocate the base of
4337 the register spill area. */
4338 if (current_frame_info
.spill_cfa_off
!= -16)
4339 fprintf (file
, "\t.spill %ld\n",
4340 (long) (current_frame_info
.spill_cfa_off
4341 + current_frame_info
.spill_size
));
4344 /* Emit the .body directive at the scheduled end of the prologue. */
4347 ia64_output_function_end_prologue (FILE *file
)
4349 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4352 fputs ("\t.body\n", file
);
4355 /* Emit the function epilogue. */
4358 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4359 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4363 if (current_frame_info
.r
[reg_fp
])
4365 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
4366 reg_names
[HARD_FRAME_POINTER_REGNUM
]
4367 = reg_names
[current_frame_info
.r
[reg_fp
]];
4368 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
4369 reg_emitted (reg_fp
);
4371 if (! TARGET_REG_NAMES
)
4373 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
4374 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
4375 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
4376 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
4377 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
4378 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
4381 current_frame_info
.initialized
= 0;
4385 ia64_dbx_register_number (int regno
)
4387 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4388 from its home at loc79 to something inside the register frame. We
4389 must perform the same renumbering here for the debug info. */
4390 if (current_frame_info
.r
[reg_fp
])
4392 if (regno
== HARD_FRAME_POINTER_REGNUM
)
4393 regno
= current_frame_info
.r
[reg_fp
];
4394 else if (regno
== current_frame_info
.r
[reg_fp
])
4395 regno
= HARD_FRAME_POINTER_REGNUM
;
4398 if (IN_REGNO_P (regno
))
4399 return 32 + regno
- IN_REG (0);
4400 else if (LOC_REGNO_P (regno
))
4401 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
4402 else if (OUT_REGNO_P (regno
))
4403 return (32 + current_frame_info
.n_input_regs
4404 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
4409 /* Implement TARGET_TRAMPOLINE_INIT.
4411 The trampoline should set the static chain pointer to value placed
4412 into the trampoline and should branch to the specified routine.
4413 To make the normal indirect-subroutine calling convention work,
4414 the trampoline must look like a function descriptor; the first
4415 word being the target address and the second being the target's
4418 We abuse the concept of a global pointer by arranging for it
4419 to point to the data we need to load. The complete trampoline
4420 has the following form:
4422 +-------------------+ \
4423 TRAMP: | __ia64_trampoline | |
4424 +-------------------+ > fake function descriptor
4426 +-------------------+ /
4427 | target descriptor |
4428 +-------------------+
4430 +-------------------+
4434 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
4436 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4437 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
4439 /* The Intel assembler requires that the global __ia64_trampoline symbol
4440 be declared explicitly */
4443 static bool declared_ia64_trampoline
= false;
4445 if (!declared_ia64_trampoline
)
4447 declared_ia64_trampoline
= true;
4448 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
4449 "__ia64_trampoline");
4453 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4454 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
4455 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
4456 static_chain
= convert_memory_address (Pmode
, static_chain
);
4458 /* Load up our iterator. */
4459 addr_reg
= copy_to_reg (addr
);
4460 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4462 /* The first two words are the fake descriptor:
4463 __ia64_trampoline, ADDR+16. */
4464 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4465 if (TARGET_ABI_OPEN_VMS
)
4467 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4468 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4469 relocation against function symbols to make it identical to the
4470 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4471 strict ELF and dereference to get the bare code address. */
4472 rtx reg
= gen_reg_rtx (Pmode
);
4473 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4474 emit_move_insn (reg
, tramp
);
4475 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4478 emit_move_insn (m_tramp
, tramp
);
4479 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4480 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4482 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (Pmode
, addr
, 16)));
4483 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4484 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4486 /* The third word is the target descriptor. */
4487 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4488 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4489 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4491 /* The fourth word is the static chain. */
4492 emit_move_insn (m_tramp
, static_chain
);
4495 /* Do any needed setup for a variadic function. CUM has not been updated
4496 for the last named argument which has type TYPE and mode MODE.
4498 We generate the actual spill instructions during prologue generation. */
4501 ia64_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4502 tree type
, int * pretend_size
,
4503 int second_time ATTRIBUTE_UNUSED
)
4505 CUMULATIVE_ARGS next_cum
= *get_cumulative_args (cum
);
4507 /* Skip the current argument. */
4508 ia64_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
, 1);
4510 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4512 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4513 *pretend_size
= n
* UNITS_PER_WORD
;
4514 cfun
->machine
->n_varargs
= n
;
4518 /* Check whether TYPE is a homogeneous floating point aggregate. If
4519 it is, return the mode of the floating point type that appears
4520 in all leafs. If it is not, return VOIDmode.
4522 An aggregate is a homogeneous floating point aggregate is if all
4523 fields/elements in it have the same floating point type (e.g,
4524 SFmode). 128-bit quad-precision floats are excluded.
4526 Variable sized aggregates should never arrive here, since we should
4527 have already decided to pass them by reference. Top-level zero-sized
4528 aggregates are excluded because our parallels crash the middle-end. */
4531 hfa_element_mode (const_tree type
, bool nested
)
4533 machine_mode element_mode
= VOIDmode
;
4535 enum tree_code code
= TREE_CODE (type
);
4536 int know_element_mode
= 0;
4539 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4544 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4545 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4546 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4547 case LANG_TYPE
: case FUNCTION_TYPE
:
4550 /* Fortran complex types are supposed to be HFAs, so we need to handle
4551 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4554 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4555 && TYPE_MODE (type
) != TCmode
)
4556 return GET_MODE_INNER (TYPE_MODE (type
));
4561 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4562 mode if this is contained within an aggregate. */
4563 if (nested
&& TYPE_MODE (type
) != TFmode
)
4564 return TYPE_MODE (type
);
4569 return hfa_element_mode (TREE_TYPE (type
), 1);
4573 case QUAL_UNION_TYPE
:
4574 for (t
= TYPE_FIELDS (type
); t
; t
= DECL_CHAIN (t
))
4576 if (TREE_CODE (t
) != FIELD_DECL
)
4579 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4580 if (know_element_mode
)
4582 if (mode
!= element_mode
)
4585 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4589 know_element_mode
= 1;
4590 element_mode
= mode
;
4593 return element_mode
;
4596 /* If we reach here, we probably have some front-end specific type
4597 that the backend doesn't know about. This can happen via the
4598 aggregate_value_p call in init_function_start. All we can do is
4599 ignore unknown tree types. */
4606 /* Return the number of words required to hold a quantity of TYPE and MODE
4607 when passed as an argument. */
4609 ia64_function_arg_words (const_tree type
, machine_mode mode
)
4613 if (mode
== BLKmode
)
4614 words
= int_size_in_bytes (type
);
4616 words
= GET_MODE_SIZE (mode
);
4618 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4621 /* Return the number of registers that should be skipped so the current
4622 argument (described by TYPE and WORDS) will be properly aligned.
4624 Integer and float arguments larger than 8 bytes start at the next
4625 even boundary. Aggregates larger than 8 bytes start at the next
4626 even boundary if the aggregate has 16 byte alignment. Note that
4627 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4628 but are still to be aligned in registers.
4630 ??? The ABI does not specify how to handle aggregates with
4631 alignment from 9 to 15 bytes, or greater than 16. We handle them
4632 all as if they had 16 byte alignment. Such aggregates can occur
4633 only if gcc extensions are used. */
4635 ia64_function_arg_offset (const CUMULATIVE_ARGS
*cum
,
4636 const_tree type
, int words
)
4638 /* No registers are skipped on VMS. */
4639 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4643 && TREE_CODE (type
) != INTEGER_TYPE
4644 && TREE_CODE (type
) != REAL_TYPE
)
4645 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4650 /* Return rtx for register where argument is passed, or zero if it is passed
4652 /* ??? 128-bit quad-precision floats are always passed in general
4656 ia64_function_arg_1 (cumulative_args_t cum_v
, machine_mode mode
,
4657 const_tree type
, bool named
, bool incoming
)
4659 const CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4661 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4662 int words
= ia64_function_arg_words (type
, mode
);
4663 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4664 machine_mode hfa_mode
= VOIDmode
;
4666 /* For OPEN VMS, emit the instruction setting up the argument register here,
4667 when we know this will be together with the other arguments setup related
4668 insns. This is not the conceptually best place to do this, but this is
4669 the easiest as we have convenient access to cumulative args info. */
4671 if (TARGET_ABI_OPEN_VMS
&& mode
== VOIDmode
&& type
== void_type_node
4674 unsigned HOST_WIDE_INT regval
= cum
->words
;
4677 for (i
= 0; i
< 8; i
++)
4678 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4680 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4684 /* If all argument slots are used, then it must go on the stack. */
4685 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4688 /* On OpenVMS argument is either in Rn or Fn. */
4689 if (TARGET_ABI_OPEN_VMS
)
4691 if (FLOAT_MODE_P (mode
))
4692 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->words
);
4694 return gen_rtx_REG (mode
, basereg
+ cum
->words
);
4697 /* Check for and handle homogeneous FP aggregates. */
4699 hfa_mode
= hfa_element_mode (type
, 0);
4701 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4702 and unprototyped hfas are passed specially. */
4703 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4707 int fp_regs
= cum
->fp_regs
;
4708 int int_regs
= cum
->words
+ offset
;
4709 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4713 /* If prototyped, pass it in FR regs then GR regs.
4714 If not prototyped, pass it in both FR and GR regs.
4716 If this is an SFmode aggregate, then it is possible to run out of
4717 FR regs while GR regs are still left. In that case, we pass the
4718 remaining part in the GR regs. */
4720 /* Fill the FP regs. We do this always. We stop if we reach the end
4721 of the argument, the last FP register, or the last argument slot. */
4723 byte_size
= ((mode
== BLKmode
)
4724 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4725 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4727 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4728 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4730 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4731 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4735 args_byte_size
+= hfa_size
;
4739 /* If no prototype, then the whole thing must go in GR regs. */
4740 if (! cum
->prototype
)
4742 /* If this is an SFmode aggregate, then we might have some left over
4743 that needs to go in GR regs. */
4744 else if (byte_size
!= offset
)
4745 int_regs
+= offset
/ UNITS_PER_WORD
;
4747 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4749 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4751 machine_mode gr_mode
= DImode
;
4752 unsigned int gr_size
;
4754 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4755 then this goes in a GR reg left adjusted/little endian, right
4756 adjusted/big endian. */
4757 /* ??? Currently this is handled wrong, because 4-byte hunks are
4758 always right adjusted/little endian. */
4761 /* If we have an even 4 byte hunk because the aggregate is a
4762 multiple of 4 bytes in size, then this goes in a GR reg right
4763 adjusted/little endian. */
4764 else if (byte_size
- offset
== 4)
4767 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4768 gen_rtx_REG (gr_mode
, (basereg
4772 gr_size
= GET_MODE_SIZE (gr_mode
);
4774 if (gr_size
== UNITS_PER_WORD
4775 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4777 else if (gr_size
> UNITS_PER_WORD
)
4778 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4780 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
4783 /* Integral and aggregates go in general registers. If we have run out of
4784 FR registers, then FP values must also go in general registers. This can
4785 happen when we have a SFmode HFA. */
4786 else if (mode
== TFmode
|| mode
== TCmode
4787 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4789 int byte_size
= ((mode
== BLKmode
)
4790 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4791 if (BYTES_BIG_ENDIAN
4792 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4793 && byte_size
< UNITS_PER_WORD
4796 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4797 gen_rtx_REG (DImode
,
4798 (basereg
+ cum
->words
4801 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4804 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4808 /* If there is a prototype, then FP values go in a FR register when
4809 named, and in a GR register when unnamed. */
4810 else if (cum
->prototype
)
4813 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4814 /* In big-endian mode, an anonymous SFmode value must be represented
4815 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4816 the value into the high half of the general register. */
4817 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
4818 return gen_rtx_PARALLEL (mode
,
4820 gen_rtx_EXPR_LIST (VOIDmode
,
4821 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4824 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
4826 /* If there is no prototype, then FP values go in both FR and GR
4830 /* See comment above. */
4831 machine_mode inner_mode
=
4832 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
4834 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4835 gen_rtx_REG (mode
, (FR_ARG_FIRST
4838 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4839 gen_rtx_REG (inner_mode
,
4840 (basereg
+ cum
->words
4844 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4848 /* Implement TARGET_FUNCION_ARG target hook. */
4851 ia64_function_arg (cumulative_args_t cum
, machine_mode mode
,
4852 const_tree type
, bool named
)
4854 return ia64_function_arg_1 (cum
, mode
, type
, named
, false);
4857 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4860 ia64_function_incoming_arg (cumulative_args_t cum
,
4862 const_tree type
, bool named
)
4864 return ia64_function_arg_1 (cum
, mode
, type
, named
, true);
4867 /* Return number of bytes, at the beginning of the argument, that must be
4868 put in registers. 0 is the argument is entirely in registers or entirely
4872 ia64_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
4873 tree type
, bool named ATTRIBUTE_UNUSED
)
4875 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4877 int words
= ia64_function_arg_words (type
, mode
);
4878 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4880 /* If all argument slots are used, then it must go on the stack. */
4881 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4884 /* It doesn't matter whether the argument goes in FR or GR regs. If
4885 it fits within the 8 argument slots, then it goes entirely in
4886 registers. If it extends past the last argument slot, then the rest
4887 goes on the stack. */
4889 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4892 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4895 /* Return ivms_arg_type based on machine_mode. */
4897 static enum ivms_arg_type
4898 ia64_arg_type (machine_mode mode
)
4911 /* Update CUM to point after this argument. This is patterned after
4912 ia64_function_arg. */
4915 ia64_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
4916 const_tree type
, bool named
)
4918 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4919 int words
= ia64_function_arg_words (type
, mode
);
4920 int offset
= ia64_function_arg_offset (cum
, type
, words
);
4921 machine_mode hfa_mode
= VOIDmode
;
4923 /* If all arg slots are already full, then there is nothing to do. */
4924 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
4926 cum
->words
+= words
+ offset
;
4930 cum
->atypes
[cum
->words
] = ia64_arg_type (mode
);
4931 cum
->words
+= words
+ offset
;
4933 /* On OpenVMS argument is either in Rn or Fn. */
4934 if (TARGET_ABI_OPEN_VMS
)
4936 cum
->int_regs
= cum
->words
;
4937 cum
->fp_regs
= cum
->words
;
4941 /* Check for and handle homogeneous FP aggregates. */
4943 hfa_mode
= hfa_element_mode (type
, 0);
4945 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4946 and unprototyped hfas are passed specially. */
4947 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
4949 int fp_regs
= cum
->fp_regs
;
4950 /* This is the original value of cum->words + offset. */
4951 int int_regs
= cum
->words
- words
;
4952 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4956 /* If prototyped, pass it in FR regs then GR regs.
4957 If not prototyped, pass it in both FR and GR regs.
4959 If this is an SFmode aggregate, then it is possible to run out of
4960 FR regs while GR regs are still left. In that case, we pass the
4961 remaining part in the GR regs. */
4963 /* Fill the FP regs. We do this always. We stop if we reach the end
4964 of the argument, the last FP register, or the last argument slot. */
4966 byte_size
= ((mode
== BLKmode
)
4967 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4968 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4970 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4971 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
4974 args_byte_size
+= hfa_size
;
4978 cum
->fp_regs
= fp_regs
;
4981 /* Integral and aggregates go in general registers. So do TFmode FP values.
4982 If we have run out of FR registers, then other FP values must also go in
4983 general registers. This can happen when we have a SFmode HFA. */
4984 else if (mode
== TFmode
|| mode
== TCmode
4985 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
4986 cum
->int_regs
= cum
->words
;
4988 /* If there is a prototype, then FP values go in a FR register when
4989 named, and in a GR register when unnamed. */
4990 else if (cum
->prototype
)
4993 cum
->int_regs
= cum
->words
;
4995 /* ??? Complex types should not reach here. */
4996 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
4998 /* If there is no prototype, then FP values go in both FR and GR
5002 /* ??? Complex types should not reach here. */
5003 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5004 cum
->int_regs
= cum
->words
;
5008 /* Arguments with alignment larger than 8 bytes start at the next even
5009 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5010 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5013 ia64_function_arg_boundary (machine_mode mode
, const_tree type
)
5015 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
5016 return PARM_BOUNDARY
* 2;
5020 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
5021 return PARM_BOUNDARY
* 2;
5023 return PARM_BOUNDARY
;
5026 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
5027 return PARM_BOUNDARY
* 2;
5029 return PARM_BOUNDARY
;
5032 /* True if it is OK to do sibling call optimization for the specified
5033 call expression EXP. DECL will be the called function, or NULL if
5034 this is an indirect call. */
5036 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5038 /* We can't perform a sibcall if the current function has the syscall_linkage
5040 if (lookup_attribute ("syscall_linkage",
5041 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
5044 /* We must always return with our current GP. This means we can
5045 only sibcall to functions defined in the current module unless
5046 TARGET_CONST_GP is set to true. */
5047 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
5051 /* Implement va_arg. */
5054 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
5057 /* Variable sized types are passed by reference. */
5058 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
5060 tree ptrtype
= build_pointer_type (type
);
5061 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
5062 return build_va_arg_indirect_ref (addr
);
5065 /* Aggregate arguments with alignment larger than 8 bytes start at
5066 the next even boundary. Integer and floating point arguments
5067 do so if they are larger than 8 bytes, whether or not they are
5068 also aligned larger than 8 bytes. */
5069 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
5070 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
5072 tree t
= fold_build_pointer_plus_hwi (valist
, 2 * UNITS_PER_WORD
- 1);
5073 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5074 build_int_cst (TREE_TYPE (t
), -2 * UNITS_PER_WORD
));
5075 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
5078 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5081 /* Return 1 if function return value returned in memory. Return 0 if it is
5085 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
5088 machine_mode hfa_mode
;
5089 HOST_WIDE_INT byte_size
;
5091 mode
= TYPE_MODE (valtype
);
5092 byte_size
= GET_MODE_SIZE (mode
);
5093 if (mode
== BLKmode
)
5095 byte_size
= int_size_in_bytes (valtype
);
5100 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5102 hfa_mode
= hfa_element_mode (valtype
, 0);
5103 if (hfa_mode
!= VOIDmode
)
5105 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5107 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
5112 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
5118 /* Return rtx for register that holds the function return value. */
5121 ia64_function_value (const_tree valtype
,
5122 const_tree fn_decl_or_type
,
5123 bool outgoing ATTRIBUTE_UNUSED
)
5126 machine_mode hfa_mode
;
5128 const_tree func
= fn_decl_or_type
;
5131 && !DECL_P (fn_decl_or_type
))
5134 mode
= TYPE_MODE (valtype
);
5135 hfa_mode
= hfa_element_mode (valtype
, 0);
5137 if (hfa_mode
!= VOIDmode
)
5145 hfa_size
= GET_MODE_SIZE (hfa_mode
);
5146 byte_size
= ((mode
== BLKmode
)
5147 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
5149 for (i
= 0; offset
< byte_size
; i
++)
5151 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5152 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
5156 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5158 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
5159 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
5162 bool need_parallel
= false;
5164 /* In big-endian mode, we need to manage the layout of aggregates
5165 in the registers so that we get the bits properly aligned in
5166 the highpart of the registers. */
5167 if (BYTES_BIG_ENDIAN
5168 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
5169 need_parallel
= true;
5171 /* Something like struct S { long double x; char a[0] } is not an
5172 HFA structure, and therefore doesn't go in fp registers. But
5173 the middle-end will give it XFmode anyway, and XFmode values
5174 don't normally fit in integer registers. So we need to smuggle
5175 the value inside a parallel. */
5176 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
5177 need_parallel
= true;
5187 bytesize
= int_size_in_bytes (valtype
);
5188 /* An empty PARALLEL is invalid here, but the return value
5189 doesn't matter for empty structs. */
5191 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5192 for (i
= 0; offset
< bytesize
; i
++)
5194 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5195 gen_rtx_REG (DImode
,
5198 offset
+= UNITS_PER_WORD
;
5200 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5203 mode
= promote_function_mode (valtype
, mode
, &unsignedp
,
5204 func
? TREE_TYPE (func
) : NULL_TREE
,
5207 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5211 /* Worker function for TARGET_LIBCALL_VALUE. */
5214 ia64_libcall_value (machine_mode mode
,
5215 const_rtx fun ATTRIBUTE_UNUSED
)
5217 return gen_rtx_REG (mode
,
5218 (((GET_MODE_CLASS (mode
) == MODE_FLOAT
5219 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5220 && (mode
) != TFmode
)
5221 ? FR_RET_FIRST
: GR_RET_FIRST
));
5224 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5227 ia64_function_value_regno_p (const unsigned int regno
)
5229 return ((regno
>= GR_RET_FIRST
&& regno
<= GR_RET_LAST
)
5230 || (regno
>= FR_RET_FIRST
&& regno
<= FR_RET_LAST
));
5233 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5234 We need to emit DTP-relative relocations. */
5237 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5239 gcc_assert (size
== 4 || size
== 8);
5241 fputs ("\tdata4.ua\t@dtprel(", file
);
5243 fputs ("\tdata8.ua\t@dtprel(", file
);
5244 output_addr_const (file
, x
);
5248 /* Print a memory address as an operand to reference that memory location. */
5250 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5251 also call this from ia64_print_operand for memory addresses. */
5254 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
5255 machine_mode
/*mode*/,
5256 rtx address ATTRIBUTE_UNUSED
)
5260 /* Print an operand to an assembler instruction.
5261 C Swap and print a comparison operator.
5262 D Print an FP comparison operator.
5263 E Print 32 - constant, for SImode shifts as extract.
5264 e Print 64 - constant, for DImode rotates.
5265 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5266 a floating point register emitted normally.
5267 G A floating point constant.
5268 I Invert a predicate register by adding 1.
5269 J Select the proper predicate register for a condition.
5270 j Select the inverse predicate register for a condition.
5271 O Append .acq for volatile load.
5272 P Postincrement of a MEM.
5273 Q Append .rel for volatile store.
5274 R Print .s .d or nothing for a single, double or no truncation.
5275 S Shift amount for shladd instruction.
5276 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5277 for Intel assembler.
5278 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5279 for Intel assembler.
5280 X A pair of floating point registers.
5281 r Print register name, or constant 0 as r0. HP compatibility for
5283 v Print vector constant value as an 8-byte integer value. */
5286 ia64_print_operand (FILE * file
, rtx x
, int code
)
5293 /* Handled below. */
5298 enum rtx_code c
= swap_condition (GET_CODE (x
));
5299 fputs (GET_RTX_NAME (c
), file
);
5304 switch (GET_CODE (x
))
5331 str
= GET_RTX_NAME (GET_CODE (x
));
5338 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
5342 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
5346 if (x
== CONST0_RTX (GET_MODE (x
)))
5347 str
= reg_names
[FR_REG (0)];
5348 else if (x
== CONST1_RTX (GET_MODE (x
)))
5349 str
= reg_names
[FR_REG (1)];
5352 gcc_assert (GET_CODE (x
) == REG
);
5353 str
= reg_names
[REGNO (x
)];
5361 real_to_target (val
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
5362 if (GET_MODE (x
) == SFmode
)
5363 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
5364 else if (GET_MODE (x
) == DFmode
)
5365 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
5367 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
5370 output_operand_lossage ("invalid %%G mode");
5375 fputs (reg_names
[REGNO (x
) + 1], file
);
5381 unsigned int regno
= REGNO (XEXP (x
, 0));
5382 if (GET_CODE (x
) == EQ
)
5386 fputs (reg_names
[regno
], file
);
5391 if (MEM_VOLATILE_P (x
))
5392 fputs(".acq", file
);
5397 HOST_WIDE_INT value
;
5399 switch (GET_CODE (XEXP (x
, 0)))
5405 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5406 if (GET_CODE (x
) == CONST_INT
)
5410 gcc_assert (GET_CODE (x
) == REG
);
5411 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
5417 value
= GET_MODE_SIZE (GET_MODE (x
));
5421 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
5425 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
5430 if (MEM_VOLATILE_P (x
))
5431 fputs(".rel", file
);
5435 if (x
== CONST0_RTX (GET_MODE (x
)))
5437 else if (x
== CONST1_RTX (GET_MODE (x
)))
5439 else if (x
== CONST2_RTX (GET_MODE (x
)))
5442 output_operand_lossage ("invalid %%R value");
5446 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5450 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5452 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
5458 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5460 const char *prefix
= "0x";
5461 if (INTVAL (x
) & 0x80000000)
5463 fprintf (file
, "0xffffffff");
5466 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
5473 unsigned int regno
= REGNO (x
);
5474 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
5479 /* If this operand is the constant zero, write it as register zero.
5480 Any register, zero, or CONST_INT value is OK here. */
5481 if (GET_CODE (x
) == REG
)
5482 fputs (reg_names
[REGNO (x
)], file
);
5483 else if (x
== CONST0_RTX (GET_MODE (x
)))
5485 else if (GET_CODE (x
) == CONST_INT
)
5486 output_addr_const (file
, x
);
5488 output_operand_lossage ("invalid %%r value");
5492 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
5493 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
5500 /* For conditional branches, returns or calls, substitute
5501 sptk, dptk, dpnt, or spnt for %s. */
5502 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5505 int pred_val
= XINT (x
, 0);
5507 /* Guess top and bottom 10% statically predicted. */
5508 if (pred_val
< REG_BR_PROB_BASE
/ 50
5509 && br_prob_note_reliable_p (x
))
5511 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5513 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5514 || !br_prob_note_reliable_p (x
))
5519 else if (CALL_P (current_output_insn
))
5524 fputs (which
, file
);
5529 x
= current_insn_predicate
;
5532 unsigned int regno
= REGNO (XEXP (x
, 0));
5533 if (GET_CODE (x
) == EQ
)
5535 fprintf (file
, "(%s) ", reg_names
[regno
]);
5540 output_operand_lossage ("ia64_print_operand: unknown code");
5544 switch (GET_CODE (x
))
5546 /* This happens for the spill/restore instructions. */
5554 fputs (reg_names
[REGNO (x
)], file
);
5559 rtx addr
= XEXP (x
, 0);
5560 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5561 addr
= XEXP (addr
, 0);
5562 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5567 output_addr_const (file
, x
);
5574 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5577 ia64_print_operand_punct_valid_p (unsigned char code
)
5579 return (code
== '+' || code
== ',');
5582 /* Compute a (partial) cost for rtx X. Return true if the complete
5583 cost has been computed, and false if subexpressions should be
5584 scanned. In either case, *TOTAL contains the cost result. */
5585 /* ??? This is incomplete. */
5588 ia64_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
5589 int opno ATTRIBUTE_UNUSED
,
5590 int *total
, bool speed ATTRIBUTE_UNUSED
)
5592 int code
= GET_CODE (x
);
5600 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5603 if (satisfies_constraint_I (x
))
5605 else if (satisfies_constraint_J (x
))
5608 *total
= COSTS_N_INSNS (1);
5611 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5614 *total
= COSTS_N_INSNS (1);
5619 *total
= COSTS_N_INSNS (1);
5625 *total
= COSTS_N_INSNS (3);
5629 *total
= COSTS_N_INSNS (4);
5633 /* For multiplies wider than HImode, we have to go to the FPU,
5634 which normally involves copies. Plus there's the latency
5635 of the multiply itself, and the latency of the instructions to
5636 transfer integer regs to FP regs. */
5637 if (FLOAT_MODE_P (mode
))
5638 *total
= COSTS_N_INSNS (4);
5639 else if (GET_MODE_SIZE (mode
) > 2)
5640 *total
= COSTS_N_INSNS (10);
5642 *total
= COSTS_N_INSNS (2);
5647 if (FLOAT_MODE_P (mode
))
5649 *total
= COSTS_N_INSNS (4);
5657 *total
= COSTS_N_INSNS (1);
5664 /* We make divide expensive, so that divide-by-constant will be
5665 optimized to a multiply. */
5666 *total
= COSTS_N_INSNS (60);
5674 /* Calculate the cost of moving data from a register in class FROM to
5675 one in class TO, using MODE. */
5678 ia64_register_move_cost (machine_mode mode
, reg_class_t from
,
5681 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5682 if (to
== ADDL_REGS
)
5684 if (from
== ADDL_REGS
)
5687 /* All costs are symmetric, so reduce cases by putting the
5688 lower number class as the destination. */
5691 reg_class_t tmp
= to
;
5692 to
= from
, from
= tmp
;
5695 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5696 so that we get secondary memory reloads. Between FR_REGS,
5697 we have to make this at least as expensive as memory_move_cost
5698 to avoid spectacularly poor register class preferencing. */
5699 if (mode
== XFmode
|| mode
== RFmode
)
5701 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5702 return memory_move_cost (mode
, to
, false);
5710 /* Moving between PR registers takes two insns. */
5711 if (from
== PR_REGS
)
5713 /* Moving between PR and anything but GR is impossible. */
5714 if (from
!= GR_REGS
)
5715 return memory_move_cost (mode
, to
, false);
5719 /* Moving between BR and anything but GR is impossible. */
5720 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5721 return memory_move_cost (mode
, to
, false);
5726 /* Moving between AR and anything but GR is impossible. */
5727 if (from
!= GR_REGS
)
5728 return memory_move_cost (mode
, to
, false);
5734 case GR_AND_FR_REGS
:
5735 case GR_AND_BR_REGS
:
5746 /* Calculate the cost of moving data of MODE from a register to or from
5750 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
5752 bool in ATTRIBUTE_UNUSED
)
5754 if (rclass
== GENERAL_REGS
5755 || rclass
== FR_REGS
5756 || rclass
== FP_REGS
5757 || rclass
== GR_AND_FR_REGS
)
5763 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5764 on RCLASS to use when copying X into that class. */
5767 ia64_preferred_reload_class (rtx x
, reg_class_t rclass
)
5773 /* Don't allow volatile mem reloads into floating point registers.
5774 This is defined to force reload to choose the r/m case instead
5775 of the f/f case when reloading (set (reg fX) (mem/v)). */
5776 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5779 /* Force all unrecognized constants into the constant pool. */
5797 /* This function returns the register class required for a secondary
5798 register when copying between one of the registers in RCLASS, and X,
5799 using MODE. A return value of NO_REGS means that no secondary register
5803 ia64_secondary_reload_class (enum reg_class rclass
,
5804 machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5808 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5809 regno
= true_regnum (x
);
5816 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5817 interaction. We end up with two pseudos with overlapping lifetimes
5818 both of which are equiv to the same constant, and both which need
5819 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5820 changes depending on the path length, which means the qty_first_reg
5821 check in make_regs_eqv can give different answers at different times.
5822 At some point I'll probably need a reload_indi pattern to handle
5825 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5826 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5827 non-general registers for good measure. */
5828 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5831 /* This is needed if a pseudo used as a call_operand gets spilled to a
5833 if (GET_CODE (x
) == MEM
)
5839 /* Need to go through general registers to get to other class regs. */
5840 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5843 /* This can happen when a paradoxical subreg is an operand to the
5845 /* ??? This shouldn't be necessary after instruction scheduling is
5846 enabled, because paradoxical subregs are not accepted by
5847 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5848 stop the paradoxical subreg stupidity in the *_operand functions
5850 if (GET_CODE (x
) == MEM
5851 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5852 || GET_MODE (x
) == QImode
))
5855 /* This can happen because of the ior/and/etc patterns that accept FP
5856 registers as operands. If the third operand is a constant, then it
5857 needs to be reloaded into a FP register. */
5858 if (GET_CODE (x
) == CONST_INT
)
5861 /* This can happen because of register elimination in a muldi3 insn.
5862 E.g. `26107 * (unsigned long)&u'. */
5863 if (GET_CODE (x
) == PLUS
)
5868 /* ??? This happens if we cse/gcse a BImode value across a call,
5869 and the function has a nonlocal goto. This is because global
5870 does not allocate call crossing pseudos to hard registers when
5871 crtl->has_nonlocal_goto is true. This is relatively
5872 common for C++ programs that use exceptions. To reproduce,
5873 return NO_REGS and compile libstdc++. */
5874 if (GET_CODE (x
) == MEM
)
5877 /* This can happen when we take a BImode subreg of a DImode value,
5878 and that DImode value winds up in some non-GR register. */
5879 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5891 /* Implement targetm.unspec_may_trap_p hook. */
5893 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
5895 switch (XINT (x
, 1))
5901 case UNSPEC_CHKACLR
:
5903 /* These unspecs are just wrappers. */
5904 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
5907 return default_unspec_may_trap_p (x
, flags
);
5911 /* Parse the -mfixed-range= option string. */
5914 fix_range (const char *const_str
)
5917 char *str
, *dash
, *comma
;
5919 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5920 REG2 are either register names or register numbers. The effect
5921 of this option is to mark the registers in the range from REG1 to
5922 REG2 as ``fixed'' so they won't be used by the compiler. This is
5923 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5925 i
= strlen (const_str
);
5926 str
= (char *) alloca (i
+ 1);
5927 memcpy (str
, const_str
, i
+ 1);
5931 dash
= strchr (str
, '-');
5934 warning (0, "value of -mfixed-range must have form REG1-REG2");
5939 comma
= strchr (dash
+ 1, ',');
5943 first
= decode_reg_name (str
);
5946 warning (0, "unknown register name: %s", str
);
5950 last
= decode_reg_name (dash
+ 1);
5953 warning (0, "unknown register name: %s", dash
+ 1);
5961 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5965 for (i
= first
; i
<= last
; ++i
)
5966 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5976 /* Implement TARGET_OPTION_OVERRIDE. */
5979 ia64_option_override (void)
5982 cl_deferred_option
*opt
;
5983 vec
<cl_deferred_option
> *v
5984 = (vec
<cl_deferred_option
> *) ia64_deferred_options
;
5987 FOR_EACH_VEC_ELT (*v
, i
, opt
)
5989 switch (opt
->opt_index
)
5991 case OPT_mfixed_range_
:
5992 fix_range (opt
->arg
);
6000 if (TARGET_AUTO_PIC
)
6001 target_flags
|= MASK_CONST_GP
;
6003 /* Numerous experiment shows that IRA based loop pressure
6004 calculation works better for RTL loop invariant motion on targets
6005 with enough (>= 32) registers. It is an expensive optimization.
6006 So it is on only for peak performance. */
6008 flag_ira_loop_pressure
= 1;
6011 ia64_section_threshold
= (global_options_set
.x_g_switch_value
6013 : IA64_DEFAULT_GVALUE
);
6015 init_machine_status
= ia64_init_machine_status
;
6017 if (align_functions
<= 0)
6018 align_functions
= 64;
6019 if (align_loops
<= 0)
6021 if (TARGET_ABI_OPEN_VMS
)
6024 ia64_override_options_after_change();
6027 /* Implement targetm.override_options_after_change. */
6030 ia64_override_options_after_change (void)
6033 && !global_options_set
.x_flag_selective_scheduling
6034 && !global_options_set
.x_flag_selective_scheduling2
)
6036 flag_selective_scheduling2
= 1;
6037 flag_sel_sched_pipelining
= 1;
6039 if (mflag_sched_control_spec
== 2)
6041 /* Control speculation is on by default for the selective scheduler,
6042 but not for the Haifa scheduler. */
6043 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
6045 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
6047 /* FIXME: remove this when we'd implement breaking autoinsns as
6048 a transformation. */
6049 flag_auto_inc_dec
= 0;
6053 /* Initialize the record of emitted frame related registers. */
6055 void ia64_init_expanders (void)
6057 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
6060 static struct machine_function
*
6061 ia64_init_machine_status (void)
6063 return ggc_cleared_alloc
<machine_function
> ();
6066 static enum attr_itanium_class
ia64_safe_itanium_class (rtx_insn
*);
6067 static enum attr_type
ia64_safe_type (rtx_insn
*);
6069 static enum attr_itanium_class
6070 ia64_safe_itanium_class (rtx_insn
*insn
)
6072 if (recog_memoized (insn
) >= 0)
6073 return get_attr_itanium_class (insn
);
6074 else if (DEBUG_INSN_P (insn
))
6075 return ITANIUM_CLASS_IGNORE
;
6077 return ITANIUM_CLASS_UNKNOWN
;
6080 static enum attr_type
6081 ia64_safe_type (rtx_insn
*insn
)
6083 if (recog_memoized (insn
) >= 0)
6084 return get_attr_type (insn
);
6086 return TYPE_UNKNOWN
;
6089 /* The following collection of routines emit instruction group stop bits as
6090 necessary to avoid dependencies. */
6092 /* Need to track some additional registers as far as serialization is
6093 concerned so we can properly handle br.call and br.ret. We could
6094 make these registers visible to gcc, but since these registers are
6095 never explicitly used in gcc generated code, it seems wasteful to
6096 do so (plus it would make the call and return patterns needlessly
6098 #define REG_RP (BR_REG (0))
6099 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6100 /* This is used for volatile asms which may require a stop bit immediately
6101 before and after them. */
6102 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6103 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6104 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6106 /* For each register, we keep track of how it has been written in the
6107 current instruction group.
6109 If a register is written unconditionally (no qualifying predicate),
6110 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6112 If a register is written if its qualifying predicate P is true, we
6113 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6114 may be written again by the complement of P (P^1) and when this happens,
6115 WRITE_COUNT gets set to 2.
6117 The result of this is that whenever an insn attempts to write a register
6118 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6120 If a predicate register is written by a floating-point insn, we set
6121 WRITTEN_BY_FP to true.
6123 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6124 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6126 #if GCC_VERSION >= 4000
6127 #define RWS_FIELD_TYPE __extension__ unsigned short
6129 #define RWS_FIELD_TYPE unsigned int
6131 struct reg_write_state
6133 RWS_FIELD_TYPE write_count
: 2;
6134 RWS_FIELD_TYPE first_pred
: 10;
6135 RWS_FIELD_TYPE written_by_fp
: 1;
6136 RWS_FIELD_TYPE written_by_and
: 1;
6137 RWS_FIELD_TYPE written_by_or
: 1;
6140 /* Cumulative info for the current instruction group. */
6141 struct reg_write_state rws_sum
[NUM_REGS
];
6143 /* Bitmap whether a register has been written in the current insn. */
6144 HARD_REG_ELT_TYPE rws_insn
[(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
6145 / HOST_BITS_PER_WIDEST_FAST_INT
];
6148 rws_insn_set (int regno
)
6150 gcc_assert (!TEST_HARD_REG_BIT (rws_insn
, regno
));
6151 SET_HARD_REG_BIT (rws_insn
, regno
);
6155 rws_insn_test (int regno
)
6157 return TEST_HARD_REG_BIT (rws_insn
, regno
);
6160 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6161 unsigned char rws_insn
[2];
6164 rws_insn_set (int regno
)
6166 if (regno
== REG_AR_CFM
)
6168 else if (regno
== REG_VOLATILE
)
6173 rws_insn_test (int regno
)
6175 if (regno
== REG_AR_CFM
)
6177 if (regno
== REG_VOLATILE
)
6183 /* Indicates whether this is the first instruction after a stop bit,
6184 in which case we don't need another stop bit. Without this,
6185 ia64_variable_issue will die when scheduling an alloc. */
6186 static int first_instruction
;
6188 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6189 RTL for one instruction. */
6192 unsigned int is_write
: 1; /* Is register being written? */
6193 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
6194 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
6195 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
6196 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
6197 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
6200 static void rws_update (int, struct reg_flags
, int);
6201 static int rws_access_regno (int, struct reg_flags
, int);
6202 static int rws_access_reg (rtx
, struct reg_flags
, int);
6203 static void update_set_flags (rtx
, struct reg_flags
*);
6204 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
6205 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
6206 static void init_insn_group_barriers (void);
6207 static int group_barrier_needed (rtx_insn
*);
6208 static int safe_group_barrier_needed (rtx_insn
*);
6209 static int in_safe_group_barrier
;
6211 /* Update *RWS for REGNO, which is being written by the current instruction,
6212 with predicate PRED, and associated register flags in FLAGS. */
6215 rws_update (int regno
, struct reg_flags flags
, int pred
)
6218 rws_sum
[regno
].write_count
++;
6220 rws_sum
[regno
].write_count
= 2;
6221 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
6222 /* ??? Not tracking and/or across differing predicates. */
6223 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6224 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6225 rws_sum
[regno
].first_pred
= pred
;
6228 /* Handle an access to register REGNO of type FLAGS using predicate register
6229 PRED. Update rws_sum array. Return 1 if this access creates
6230 a dependency with an earlier instruction in the same group. */
6233 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
6235 int need_barrier
= 0;
6237 gcc_assert (regno
< NUM_REGS
);
6239 if (! PR_REGNO_P (regno
))
6240 flags
.is_and
= flags
.is_or
= 0;
6246 rws_insn_set (regno
);
6247 write_count
= rws_sum
[regno
].write_count
;
6249 switch (write_count
)
6252 /* The register has not been written yet. */
6253 if (!in_safe_group_barrier
)
6254 rws_update (regno
, flags
, pred
);
6258 /* The register has been written via a predicate. Treat
6259 it like a unconditional write and do not try to check
6260 for complementary pred reg in earlier write. */
6261 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6263 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6267 if (!in_safe_group_barrier
)
6268 rws_update (regno
, flags
, pred
);
6272 /* The register has been unconditionally written already. We
6274 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6276 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6280 if (!in_safe_group_barrier
)
6282 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6283 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6293 if (flags
.is_branch
)
6295 /* Branches have several RAW exceptions that allow to avoid
6298 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
6299 /* RAW dependencies on branch regs are permissible as long
6300 as the writer is a non-branch instruction. Since we
6301 never generate code that uses a branch register written
6302 by a branch instruction, handling this case is
6306 if (REGNO_REG_CLASS (regno
) == PR_REGS
6307 && ! rws_sum
[regno
].written_by_fp
)
6308 /* The predicates of a branch are available within the
6309 same insn group as long as the predicate was written by
6310 something other than a floating-point instruction. */
6314 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6316 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6319 switch (rws_sum
[regno
].write_count
)
6322 /* The register has not been written yet. */
6326 /* The register has been written via a predicate, assume we
6327 need a barrier (don't check for complementary regs). */
6332 /* The register has been unconditionally written already. We
6342 return need_barrier
;
6346 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
6348 int regno
= REGNO (reg
);
6349 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
6352 return rws_access_regno (regno
, flags
, pred
);
6355 int need_barrier
= 0;
6357 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
6358 return need_barrier
;
6362 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6363 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6366 update_set_flags (rtx x
, struct reg_flags
*pflags
)
6368 rtx src
= SET_SRC (x
);
6370 switch (GET_CODE (src
))
6376 /* There are four cases here:
6377 (1) The destination is (pc), in which case this is a branch,
6378 nothing here applies.
6379 (2) The destination is ar.lc, in which case this is a
6380 doloop_end_internal,
6381 (3) The destination is an fp register, in which case this is
6382 an fselect instruction.
6383 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6384 this is a check load.
6385 In all cases, nothing we do in this function applies. */
6389 if (COMPARISON_P (src
)
6390 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
6391 /* Set pflags->is_fp to 1 so that we know we're dealing
6392 with a floating point comparison when processing the
6393 destination of the SET. */
6396 /* Discover if this is a parallel comparison. We only handle
6397 and.orcm and or.andcm at present, since we must retain a
6398 strict inverse on the predicate pair. */
6399 else if (GET_CODE (src
) == AND
)
6401 else if (GET_CODE (src
) == IOR
)
6408 /* Subroutine of rtx_needs_barrier; this function determines whether the
6409 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6410 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6414 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6416 int need_barrier
= 0;
6418 rtx src
= SET_SRC (x
);
6420 if (GET_CODE (src
) == CALL
)
6421 /* We don't need to worry about the result registers that
6422 get written by subroutine call. */
6423 return rtx_needs_barrier (src
, flags
, pred
);
6424 else if (SET_DEST (x
) == pc_rtx
)
6426 /* X is a conditional branch. */
6427 /* ??? This seems redundant, as the caller sets this bit for
6429 if (!ia64_spec_check_src_p (src
))
6430 flags
.is_branch
= 1;
6431 return rtx_needs_barrier (src
, flags
, pred
);
6434 if (ia64_spec_check_src_p (src
))
6435 /* Avoid checking one register twice (in condition
6436 and in 'then' section) for ldc pattern. */
6438 gcc_assert (REG_P (XEXP (src
, 2)));
6439 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
6441 /* We process MEM below. */
6442 src
= XEXP (src
, 1);
6445 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
6448 if (GET_CODE (dst
) == ZERO_EXTRACT
)
6450 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
6451 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
6453 return need_barrier
;
6456 /* Handle an access to rtx X of type FLAGS using predicate register
6457 PRED. Return 1 if this access creates a dependency with an earlier
6458 instruction in the same group. */
6461 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6464 int is_complemented
= 0;
6465 int need_barrier
= 0;
6466 const char *format_ptr
;
6467 struct reg_flags new_flags
;
6475 switch (GET_CODE (x
))
6478 update_set_flags (x
, &new_flags
);
6479 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
6480 if (GET_CODE (SET_SRC (x
)) != CALL
)
6482 new_flags
.is_write
= 1;
6483 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
6488 new_flags
.is_write
= 0;
6489 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6491 /* Avoid multiple register writes, in case this is a pattern with
6492 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6493 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
6495 new_flags
.is_write
= 1;
6496 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
6497 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
6498 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6503 /* X is a predicated instruction. */
6505 cond
= COND_EXEC_TEST (x
);
6507 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
6509 if (GET_CODE (cond
) == EQ
)
6510 is_complemented
= 1;
6511 cond
= XEXP (cond
, 0);
6512 gcc_assert (GET_CODE (cond
) == REG
6513 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
6514 pred
= REGNO (cond
);
6515 if (is_complemented
)
6518 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6519 return need_barrier
;
6523 /* Clobber & use are for earlier compiler-phases only. */
6528 /* We always emit stop bits for traditional asms. We emit stop bits
6529 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6530 if (GET_CODE (x
) != ASM_OPERANDS
6531 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6533 /* Avoid writing the register multiple times if we have multiple
6534 asm outputs. This avoids a failure in rws_access_reg. */
6535 if (! rws_insn_test (REG_VOLATILE
))
6537 new_flags
.is_write
= 1;
6538 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6543 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6544 We cannot just fall through here since then we would be confused
6545 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6546 traditional asms unlike their normal usage. */
6548 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6549 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6554 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6556 rtx pat
= XVECEXP (x
, 0, i
);
6557 switch (GET_CODE (pat
))
6560 update_set_flags (pat
, &new_flags
);
6561 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6568 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6572 if (REG_P (XEXP (pat
, 0))
6573 && extract_asm_operands (x
) != NULL_RTX
6574 && REGNO (XEXP (pat
, 0)) != AR_UNAT_REGNUM
)
6576 new_flags
.is_write
= 1;
6577 need_barrier
|= rtx_needs_barrier (XEXP (pat
, 0),
6590 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6592 rtx pat
= XVECEXP (x
, 0, i
);
6593 if (GET_CODE (pat
) == SET
)
6595 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6597 new_flags
.is_write
= 1;
6598 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6602 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6603 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6608 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6611 if (REGNO (x
) == AR_UNAT_REGNUM
)
6613 for (i
= 0; i
< 64; ++i
)
6614 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6617 need_barrier
= rws_access_reg (x
, flags
, pred
);
6621 /* Find the regs used in memory address computation. */
6622 new_flags
.is_write
= 0;
6623 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6626 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6627 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6630 /* Operators with side-effects. */
6631 case POST_INC
: case POST_DEC
:
6632 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6634 new_flags
.is_write
= 0;
6635 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6636 new_flags
.is_write
= 1;
6637 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6641 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6643 new_flags
.is_write
= 0;
6644 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6645 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6646 new_flags
.is_write
= 1;
6647 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6650 /* Handle common unary and binary ops for efficiency. */
6651 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6652 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6653 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6654 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6655 case NE
: case EQ
: case GE
: case GT
: case LE
:
6656 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6657 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6658 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6661 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6662 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6663 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6664 case SQRT
: case FFS
: case POPCOUNT
:
6665 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6669 /* VEC_SELECT's second argument is a PARALLEL with integers that
6670 describe the elements selected. On ia64, those integers are
6671 always constants. Avoid walking the PARALLEL so that we don't
6672 get confused with "normal" parallels and then die. */
6673 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6677 switch (XINT (x
, 1))
6679 case UNSPEC_LTOFF_DTPMOD
:
6680 case UNSPEC_LTOFF_DTPREL
:
6682 case UNSPEC_LTOFF_TPREL
:
6684 case UNSPEC_PRED_REL_MUTEX
:
6685 case UNSPEC_PIC_CALL
:
6687 case UNSPEC_FETCHADD_ACQ
:
6688 case UNSPEC_FETCHADD_REL
:
6689 case UNSPEC_BSP_VALUE
:
6690 case UNSPEC_FLUSHRS
:
6691 case UNSPEC_BUNDLE_SELECTOR
:
6694 case UNSPEC_GR_SPILL
:
6695 case UNSPEC_GR_RESTORE
:
6697 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6698 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6700 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6701 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6702 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6707 case UNSPEC_FR_SPILL
:
6708 case UNSPEC_FR_RESTORE
:
6709 case UNSPEC_GETF_EXP
:
6710 case UNSPEC_SETF_EXP
:
6712 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6713 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6718 case UNSPEC_CHKACLR
:
6720 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6723 case UNSPEC_FR_RECIP_APPROX
:
6725 case UNSPEC_COPYSIGN
:
6726 case UNSPEC_FR_RECIP_APPROX_RES
:
6727 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6728 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6731 case UNSPEC_CMPXCHG_ACQ
:
6732 case UNSPEC_CMPXCHG_REL
:
6733 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6734 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6742 case UNSPEC_VOLATILE
:
6743 switch (XINT (x
, 1))
6746 /* Alloc must always be the first instruction of a group.
6747 We force this by always returning true. */
6748 /* ??? We might get better scheduling if we explicitly check for
6749 input/local/output register dependencies, and modify the
6750 scheduler so that alloc is always reordered to the start of
6751 the current group. We could then eliminate all of the
6752 first_instruction code. */
6753 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6755 new_flags
.is_write
= 1;
6756 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6759 case UNSPECV_SET_BSP
:
6760 case UNSPECV_PROBE_STACK_RANGE
:
6764 case UNSPECV_BLOCKAGE
:
6765 case UNSPECV_INSN_GROUP_BARRIER
:
6767 case UNSPECV_PSAC_ALL
:
6768 case UNSPECV_PSAC_NORMAL
:
6771 case UNSPECV_PROBE_STACK_ADDRESS
:
6772 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6781 new_flags
.is_write
= 0;
6782 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6783 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6785 new_flags
.is_write
= 1;
6786 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6787 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6791 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6792 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6793 switch (format_ptr
[i
])
6795 case '0': /* unused field */
6796 case 'i': /* integer */
6797 case 'n': /* note */
6798 case 'w': /* wide integer */
6799 case 's': /* pointer to string */
6800 case 'S': /* optional pointer to string */
6804 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6809 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6810 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6819 return need_barrier
;
6822 /* Clear out the state for group_barrier_needed at the start of a
6823 sequence of insns. */
6826 init_insn_group_barriers (void)
6828 memset (rws_sum
, 0, sizeof (rws_sum
));
6829 first_instruction
= 1;
6832 /* Given the current state, determine whether a group barrier (a stop bit) is
6833 necessary before INSN. Return nonzero if so. This modifies the state to
6834 include the effects of INSN as a side-effect. */
6837 group_barrier_needed (rtx_insn
*insn
)
6840 int need_barrier
= 0;
6841 struct reg_flags flags
;
6843 memset (&flags
, 0, sizeof (flags
));
6844 switch (GET_CODE (insn
))
6851 /* A barrier doesn't imply an instruction group boundary. */
6855 memset (rws_insn
, 0, sizeof (rws_insn
));
6859 flags
.is_branch
= 1;
6860 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6861 memset (rws_insn
, 0, sizeof (rws_insn
));
6863 /* Don't bundle a call following another call. */
6864 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6870 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6874 if (!ia64_spec_check_p (insn
))
6875 flags
.is_branch
= 1;
6877 /* Don't bundle a jump following a call. */
6878 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6886 if (GET_CODE (PATTERN (insn
)) == USE
6887 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6888 /* Don't care about USE and CLOBBER "insns"---those are used to
6889 indicate to the optimizer that it shouldn't get rid of
6890 certain operations. */
6893 pat
= PATTERN (insn
);
6895 /* Ug. Hack hacks hacked elsewhere. */
6896 switch (recog_memoized (insn
))
6898 /* We play dependency tricks with the epilogue in order
6899 to get proper schedules. Undo this for dv analysis. */
6900 case CODE_FOR_epilogue_deallocate_stack
:
6901 case CODE_FOR_prologue_allocate_stack
:
6902 pat
= XVECEXP (pat
, 0, 0);
6905 /* The pattern we use for br.cloop confuses the code above.
6906 The second element of the vector is representative. */
6907 case CODE_FOR_doloop_end_internal
:
6908 pat
= XVECEXP (pat
, 0, 1);
6911 /* Doesn't generate code. */
6912 case CODE_FOR_pred_rel_mutex
:
6913 case CODE_FOR_prologue_use
:
6920 memset (rws_insn
, 0, sizeof (rws_insn
));
6921 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
6923 /* Check to see if the previous instruction was a volatile
6926 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
6934 if (first_instruction
&& important_for_bundling_p (insn
))
6937 first_instruction
= 0;
6940 return need_barrier
;
6943 /* Like group_barrier_needed, but do not clobber the current state. */
6946 safe_group_barrier_needed (rtx_insn
*insn
)
6948 int saved_first_instruction
;
6951 saved_first_instruction
= first_instruction
;
6952 in_safe_group_barrier
= 1;
6954 t
= group_barrier_needed (insn
);
6956 first_instruction
= saved_first_instruction
;
6957 in_safe_group_barrier
= 0;
6962 /* Scan the current function and insert stop bits as necessary to
6963 eliminate dependencies. This function assumes that a final
6964 instruction scheduling pass has been run which has already
6965 inserted most of the necessary stop bits. This function only
6966 inserts new ones at basic block boundaries, since these are
6967 invisible to the scheduler. */
6970 emit_insn_group_barriers (FILE *dump
)
6973 rtx_insn
*last_label
= 0;
6974 int insns_since_last_label
= 0;
6976 init_insn_group_barriers ();
6978 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6982 if (insns_since_last_label
)
6984 insns_since_last_label
= 0;
6986 else if (NOTE_P (insn
)
6987 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
6989 if (insns_since_last_label
)
6991 insns_since_last_label
= 0;
6993 else if (NONJUMP_INSN_P (insn
)
6994 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
6995 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
6997 init_insn_group_barriers ();
7000 else if (NONDEBUG_INSN_P (insn
))
7002 insns_since_last_label
= 1;
7004 if (group_barrier_needed (insn
))
7009 fprintf (dump
, "Emitting stop before label %d\n",
7010 INSN_UID (last_label
));
7011 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
7014 init_insn_group_barriers ();
7022 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7023 This function has to emit all necessary group barriers. */
7026 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
7030 init_insn_group_barriers ();
7032 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7034 if (BARRIER_P (insn
))
7036 rtx_insn
*last
= prev_active_insn (insn
);
7040 if (JUMP_TABLE_DATA_P (last
))
7041 last
= prev_active_insn (last
);
7042 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7043 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7045 init_insn_group_barriers ();
7047 else if (NONDEBUG_INSN_P (insn
))
7049 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7050 init_insn_group_barriers ();
7051 else if (group_barrier_needed (insn
))
7053 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
7054 init_insn_group_barriers ();
7055 group_barrier_needed (insn
);
7063 /* Instruction scheduling support. */
7065 #define NR_BUNDLES 10
7067 /* A list of names of all available bundles. */
7069 static const char *bundle_name
[NR_BUNDLES
] =
7075 #if NR_BUNDLES == 10
7085 /* Nonzero if we should insert stop bits into the schedule. */
7087 int ia64_final_schedule
= 0;
7089 /* Codes of the corresponding queried units: */
7091 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
7092 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
7094 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
7095 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
7097 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
7099 /* The following variable value is an insn group barrier. */
7101 static rtx_insn
*dfa_stop_insn
;
7103 /* The following variable value is the last issued insn. */
7105 static rtx_insn
*last_scheduled_insn
;
7107 /* The following variable value is pointer to a DFA state used as
7108 temporary variable. */
7110 static state_t temp_dfa_state
= NULL
;
7112 /* The following variable value is DFA state after issuing the last
7115 static state_t prev_cycle_state
= NULL
;
7117 /* The following array element values are TRUE if the corresponding
7118 insn requires to add stop bits before it. */
7120 static char *stops_p
= NULL
;
7122 /* The following variable is used to set up the mentioned above array. */
7124 static int stop_before_p
= 0;
7126 /* The following variable value is length of the arrays `clocks' and
7129 static int clocks_length
;
7131 /* The following variable value is number of data speculations in progress. */
7132 static int pending_data_specs
= 0;
7134 /* Number of memory references on current and three future processor cycles. */
7135 static char mem_ops_in_group
[4];
7137 /* Number of current processor cycle (from scheduler's point of view). */
7138 static int current_cycle
;
7140 static rtx
ia64_single_set (rtx_insn
*);
7141 static void ia64_emit_insn_before (rtx
, rtx
);
7143 /* Map a bundle number to its pseudo-op. */
7146 get_bundle_name (int b
)
7148 return bundle_name
[b
];
7152 /* Return the maximum number of instructions a cpu can issue. */
7155 ia64_issue_rate (void)
7160 /* Helper function - like single_set, but look inside COND_EXEC. */
7163 ia64_single_set (rtx_insn
*insn
)
7165 rtx x
= PATTERN (insn
), ret
;
7166 if (GET_CODE (x
) == COND_EXEC
)
7167 x
= COND_EXEC_CODE (x
);
7168 if (GET_CODE (x
) == SET
)
7171 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7172 Although they are not classical single set, the second set is there just
7173 to protect it from moving past FP-relative stack accesses. */
7174 switch (recog_memoized (insn
))
7176 case CODE_FOR_prologue_allocate_stack
:
7177 case CODE_FOR_prologue_allocate_stack_pr
:
7178 case CODE_FOR_epilogue_deallocate_stack
:
7179 case CODE_FOR_epilogue_deallocate_stack_pr
:
7180 ret
= XVECEXP (x
, 0, 0);
7184 ret
= single_set_2 (insn
, x
);
7191 /* Adjust the cost of a scheduling dependency.
7192 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7193 COST is the current cost, DW is dependency weakness. */
7195 ia64_adjust_cost (rtx_insn
*insn
, int dep_type1
, rtx_insn
*dep_insn
,
7198 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
7199 enum attr_itanium_class dep_class
;
7200 enum attr_itanium_class insn_class
;
7202 insn_class
= ia64_safe_itanium_class (insn
);
7203 dep_class
= ia64_safe_itanium_class (dep_insn
);
7205 /* Treat true memory dependencies separately. Ignore apparent true
7206 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7207 if (dep_type
== REG_DEP_TRUE
7208 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
7209 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
7212 if (dw
== MIN_DEP_WEAK
)
7213 /* Store and load are likely to alias, use higher cost to avoid stall. */
7214 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST
);
7215 else if (dw
> MIN_DEP_WEAK
)
7217 /* Store and load are less likely to alias. */
7218 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
7219 /* Assume there will be no cache conflict for floating-point data.
7220 For integer data, L1 conflict penalty is huge (17 cycles), so we
7221 never assume it will not cause a conflict. */
7227 if (dep_type
!= REG_DEP_OUTPUT
)
7230 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
7231 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
7237 /* Like emit_insn_before, but skip cycle_display notes.
7238 ??? When cycle display notes are implemented, update this. */
7241 ia64_emit_insn_before (rtx insn
, rtx before
)
7243 emit_insn_before (insn
, before
);
7246 /* The following function marks insns who produce addresses for load
7247 and store insns. Such insns will be placed into M slots because it
7248 decrease latency time for Itanium1 (see function
7249 `ia64_produce_address_p' and the DFA descriptions). */
7252 ia64_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
7254 rtx_insn
*insn
, *next
, *next_tail
;
7256 /* Before reload, which_alternative is not set, which means that
7257 ia64_safe_itanium_class will produce wrong results for (at least)
7258 move instructions. */
7259 if (!reload_completed
)
7262 next_tail
= NEXT_INSN (tail
);
7263 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7266 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7268 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
7270 sd_iterator_def sd_it
;
7272 bool has_mem_op_consumer_p
= false;
7274 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
7276 enum attr_itanium_class c
;
7278 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
7281 next
= DEP_CON (dep
);
7282 c
= ia64_safe_itanium_class (next
);
7283 if ((c
== ITANIUM_CLASS_ST
7284 || c
== ITANIUM_CLASS_STF
)
7285 && ia64_st_address_bypass_p (insn
, next
))
7287 has_mem_op_consumer_p
= true;
7290 else if ((c
== ITANIUM_CLASS_LD
7291 || c
== ITANIUM_CLASS_FLD
7292 || c
== ITANIUM_CLASS_FLDP
)
7293 && ia64_ld_address_bypass_p (insn
, next
))
7295 has_mem_op_consumer_p
= true;
7300 insn
->call
= has_mem_op_consumer_p
;
7304 /* We're beginning a new block. Initialize data structures as necessary. */
7307 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
7308 int sched_verbose ATTRIBUTE_UNUSED
,
7309 int max_ready ATTRIBUTE_UNUSED
)
7311 if (flag_checking
&& !sel_sched_p () && reload_completed
)
7313 for (rtx_insn
*insn
= NEXT_INSN (current_sched_info
->prev_head
);
7314 insn
!= current_sched_info
->next_tail
;
7315 insn
= NEXT_INSN (insn
))
7316 gcc_assert (!SCHED_GROUP_P (insn
));
7318 last_scheduled_insn
= NULL
;
7319 init_insn_group_barriers ();
7322 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7325 /* We're beginning a scheduling pass. Check assertion. */
7328 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
7329 int sched_verbose ATTRIBUTE_UNUSED
,
7330 int max_ready ATTRIBUTE_UNUSED
)
7332 gcc_assert (pending_data_specs
== 0);
7335 /* Scheduling pass is now finished. Free/reset static variable. */
7337 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
7338 int sched_verbose ATTRIBUTE_UNUSED
)
7340 gcc_assert (pending_data_specs
== 0);
7343 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7344 speculation check), FALSE otherwise. */
7346 is_load_p (rtx_insn
*insn
)
7348 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7351 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
7352 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
7355 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7356 (taking account for 3-cycle cache reference postponing for stores: Intel
7357 Itanium 2 Reference Manual for Software Development and Optimization,
7360 record_memory_reference (rtx_insn
*insn
)
7362 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7364 switch (insn_class
) {
7365 case ITANIUM_CLASS_FLD
:
7366 case ITANIUM_CLASS_LD
:
7367 mem_ops_in_group
[current_cycle
% 4]++;
7369 case ITANIUM_CLASS_STF
:
7370 case ITANIUM_CLASS_ST
:
7371 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
7377 /* We are about to being issuing insns for this clock cycle.
7378 Override the default sort algorithm to better slot instructions. */
7381 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7382 int *pn_ready
, int clock_var
,
7386 int n_ready
= *pn_ready
;
7387 rtx_insn
**e_ready
= ready
+ n_ready
;
7391 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
7393 if (reorder_type
== 0)
7395 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7397 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7398 if (insnp
< e_ready
)
7400 rtx_insn
*insn
= *insnp
;
7401 enum attr_type t
= ia64_safe_type (insn
);
7402 if (t
== TYPE_UNKNOWN
)
7404 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7405 || asm_noperands (PATTERN (insn
)) >= 0)
7407 rtx_insn
*lowest
= ready
[n_asms
];
7408 ready
[n_asms
] = insn
;
7414 rtx_insn
*highest
= ready
[n_ready
- 1];
7415 ready
[n_ready
- 1] = insn
;
7422 if (n_asms
< n_ready
)
7424 /* Some normal insns to process. Skip the asms. */
7428 else if (n_ready
> 0)
7432 if (ia64_final_schedule
)
7435 int nr_need_stop
= 0;
7437 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7438 if (safe_group_barrier_needed (*insnp
))
7441 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
7443 if (reorder_type
== 0)
7446 /* Move down everything that needs a stop bit, preserving
7448 while (insnp
-- > ready
+ deleted
)
7449 while (insnp
>= ready
+ deleted
)
7451 rtx_insn
*insn
= *insnp
;
7452 if (! safe_group_barrier_needed (insn
))
7454 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7462 current_cycle
= clock_var
;
7463 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
7468 /* Move down loads/stores, preserving relative order. */
7469 while (insnp
-- > ready
+ moved
)
7470 while (insnp
>= ready
+ moved
)
7472 rtx_insn
*insn
= *insnp
;
7473 if (! is_load_p (insn
))
7475 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7486 /* We are about to being issuing insns for this clock cycle. Override
7487 the default sort algorithm to better slot instructions. */
7490 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7491 int *pn_ready
, int clock_var
)
7493 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
7494 pn_ready
, clock_var
, 0);
7497 /* Like ia64_sched_reorder, but called after issuing each insn.
7498 Override the default sort algorithm to better slot instructions. */
7501 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
7502 int sched_verbose ATTRIBUTE_UNUSED
, rtx_insn
**ready
,
7503 int *pn_ready
, int clock_var
)
7505 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
7509 /* We are about to issue INSN. Return the number of insns left on the
7510 ready queue that can be issued this cycle. */
7513 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
7514 int sched_verbose ATTRIBUTE_UNUSED
,
7516 int can_issue_more ATTRIBUTE_UNUSED
)
7518 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
7519 /* Modulo scheduling does not extend h_i_d when emitting
7520 new instructions. Don't use h_i_d, if we don't have to. */
7522 if (DONE_SPEC (insn
) & BEGIN_DATA
)
7523 pending_data_specs
++;
7524 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
7525 pending_data_specs
--;
7528 if (DEBUG_INSN_P (insn
))
7531 last_scheduled_insn
= insn
;
7532 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7533 if (reload_completed
)
7535 int needed
= group_barrier_needed (insn
);
7537 gcc_assert (!needed
);
7539 init_insn_group_barriers ();
7540 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7543 record_memory_reference (insn
);
7548 /* We are choosing insn from the ready queue. Return zero if INSN
7552 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
7554 gcc_assert (insn
&& INSN_P (insn
));
7556 /* Size of ALAT is 32. As far as we perform conservative
7557 data speculation, we keep ALAT half-empty. */
7558 if (pending_data_specs
>= 16 && (TODO_SPEC (insn
) & BEGIN_DATA
))
7559 return ready_index
== 0 ? -1 : 1;
7561 if (ready_index
== 0)
7564 if ((!reload_completed
7565 || !safe_group_barrier_needed (insn
))
7566 && (!mflag_sched_mem_insns_hard_limit
7567 || !is_load_p (insn
)
7568 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
))
7574 /* The following variable value is pseudo-insn used by the DFA insn
7575 scheduler to change the DFA state when the simulated clock is
7578 static rtx_insn
*dfa_pre_cycle_insn
;
7580 /* Returns 1 when a meaningful insn was scheduled between the last group
7581 barrier and LAST. */
7583 scheduled_good_insn (rtx_insn
*last
)
7585 if (last
&& recog_memoized (last
) >= 0)
7589 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7590 && !stops_p
[INSN_UID (last
)];
7591 last
= PREV_INSN (last
))
7592 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7593 the ebb we're scheduling. */
7594 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7600 /* We are about to being issuing INSN. Return nonzero if we cannot
7601 issue it on given cycle CLOCK and return zero if we should not sort
7602 the ready queue on the next clock start. */
7605 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx_insn
*insn
, int last_clock
,
7606 int clock
, int *sort_p
)
7608 gcc_assert (insn
&& INSN_P (insn
));
7610 if (DEBUG_INSN_P (insn
))
7613 /* When a group barrier is needed for insn, last_scheduled_insn
7615 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7616 || last_scheduled_insn
);
7618 if ((reload_completed
7619 && (safe_group_barrier_needed (insn
)
7620 || (mflag_sched_stop_bits_after_every_cycle
7621 && last_clock
!= clock
7622 && last_scheduled_insn
7623 && scheduled_good_insn (last_scheduled_insn
))))
7624 || (last_scheduled_insn
7625 && (CALL_P (last_scheduled_insn
)
7626 || unknown_for_bundling_p (last_scheduled_insn
))))
7628 init_insn_group_barriers ();
7630 if (verbose
&& dump
)
7631 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7632 last_clock
== clock
? " + cycle advance" : "");
7635 current_cycle
= clock
;
7636 mem_ops_in_group
[current_cycle
% 4] = 0;
7638 if (last_clock
== clock
)
7640 state_transition (curr_state
, dfa_stop_insn
);
7641 if (TARGET_EARLY_STOP_BITS
)
7642 *sort_p
= (last_scheduled_insn
== NULL_RTX
7643 || ! CALL_P (last_scheduled_insn
));
7649 if (last_scheduled_insn
)
7651 if (unknown_for_bundling_p (last_scheduled_insn
))
7652 state_reset (curr_state
);
7655 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7656 state_transition (curr_state
, dfa_stop_insn
);
7657 state_transition (curr_state
, dfa_pre_cycle_insn
);
7658 state_transition (curr_state
, NULL
);
7665 /* Implement targetm.sched.h_i_d_extended hook.
7666 Extend internal data structures. */
7668 ia64_h_i_d_extended (void)
7670 if (stops_p
!= NULL
)
7672 int new_clocks_length
= get_max_uid () * 3 / 2;
7673 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7674 clocks_length
= new_clocks_length
;
7679 /* This structure describes the data used by the backend to guide scheduling.
7680 When the current scheduling point is switched, this data should be saved
7681 and restored later, if the scheduler returns to this point. */
7682 struct _ia64_sched_context
7684 state_t prev_cycle_state
;
7685 rtx_insn
*last_scheduled_insn
;
7686 struct reg_write_state rws_sum
[NUM_REGS
];
7687 struct reg_write_state rws_insn
[NUM_REGS
];
7688 int first_instruction
;
7689 int pending_data_specs
;
7691 char mem_ops_in_group
[4];
7693 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7695 /* Allocates a scheduling context. */
7697 ia64_alloc_sched_context (void)
7699 return xmalloc (sizeof (struct _ia64_sched_context
));
7702 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7703 the global context otherwise. */
7705 ia64_init_sched_context (void *_sc
, bool clean_p
)
7707 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7709 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7712 state_reset (sc
->prev_cycle_state
);
7713 sc
->last_scheduled_insn
= NULL
;
7714 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7715 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7716 sc
->first_instruction
= 1;
7717 sc
->pending_data_specs
= 0;
7718 sc
->current_cycle
= 0;
7719 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7723 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7724 sc
->last_scheduled_insn
= last_scheduled_insn
;
7725 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7726 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7727 sc
->first_instruction
= first_instruction
;
7728 sc
->pending_data_specs
= pending_data_specs
;
7729 sc
->current_cycle
= current_cycle
;
7730 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7734 /* Sets the global scheduling context to the one pointed to by _SC. */
7736 ia64_set_sched_context (void *_sc
)
7738 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7740 gcc_assert (sc
!= NULL
);
7742 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7743 last_scheduled_insn
= sc
->last_scheduled_insn
;
7744 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7745 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7746 first_instruction
= sc
->first_instruction
;
7747 pending_data_specs
= sc
->pending_data_specs
;
7748 current_cycle
= sc
->current_cycle
;
7749 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7752 /* Clears the data in the _SC scheduling context. */
7754 ia64_clear_sched_context (void *_sc
)
7756 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7758 free (sc
->prev_cycle_state
);
7759 sc
->prev_cycle_state
= NULL
;
7762 /* Frees the _SC scheduling context. */
7764 ia64_free_sched_context (void *_sc
)
7766 gcc_assert (_sc
!= NULL
);
7771 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7773 /* Return a function that will generate a load of mode MODE_NO
7774 with speculation types TS. */
7776 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7778 static gen_func_t gen_ld_
[] = {
7788 gen_zero_extendqidi2
,
7789 gen_zero_extendhidi2
,
7790 gen_zero_extendsidi2
,
7793 static gen_func_t gen_ld_a
[] = {
7803 gen_zero_extendqidi2_advanced
,
7804 gen_zero_extendhidi2_advanced
,
7805 gen_zero_extendsidi2_advanced
,
7807 static gen_func_t gen_ld_s
[] = {
7808 gen_movbi_speculative
,
7809 gen_movqi_speculative
,
7810 gen_movhi_speculative
,
7811 gen_movsi_speculative
,
7812 gen_movdi_speculative
,
7813 gen_movsf_speculative
,
7814 gen_movdf_speculative
,
7815 gen_movxf_speculative
,
7816 gen_movti_speculative
,
7817 gen_zero_extendqidi2_speculative
,
7818 gen_zero_extendhidi2_speculative
,
7819 gen_zero_extendsidi2_speculative
,
7821 static gen_func_t gen_ld_sa
[] = {
7822 gen_movbi_speculative_advanced
,
7823 gen_movqi_speculative_advanced
,
7824 gen_movhi_speculative_advanced
,
7825 gen_movsi_speculative_advanced
,
7826 gen_movdi_speculative_advanced
,
7827 gen_movsf_speculative_advanced
,
7828 gen_movdf_speculative_advanced
,
7829 gen_movxf_speculative_advanced
,
7830 gen_movti_speculative_advanced
,
7831 gen_zero_extendqidi2_speculative_advanced
,
7832 gen_zero_extendhidi2_speculative_advanced
,
7833 gen_zero_extendsidi2_speculative_advanced
,
7835 static gen_func_t gen_ld_s_a
[] = {
7836 gen_movbi_speculative_a
,
7837 gen_movqi_speculative_a
,
7838 gen_movhi_speculative_a
,
7839 gen_movsi_speculative_a
,
7840 gen_movdi_speculative_a
,
7841 gen_movsf_speculative_a
,
7842 gen_movdf_speculative_a
,
7843 gen_movxf_speculative_a
,
7844 gen_movti_speculative_a
,
7845 gen_zero_extendqidi2_speculative_a
,
7846 gen_zero_extendhidi2_speculative_a
,
7847 gen_zero_extendsidi2_speculative_a
,
7852 if (ts
& BEGIN_DATA
)
7854 if (ts
& BEGIN_CONTROL
)
7859 else if (ts
& BEGIN_CONTROL
)
7861 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7862 || ia64_needs_block_p (ts
))
7865 gen_ld
= gen_ld_s_a
;
7872 return gen_ld
[mode_no
];
7875 /* Constants that help mapping 'machine_mode' to int. */
7878 SPEC_MODE_INVALID
= -1,
7879 SPEC_MODE_FIRST
= 0,
7880 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7881 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7887 /* Offset to reach ZERO_EXTEND patterns. */
7888 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
7891 /* Return index of the MODE. */
7893 ia64_mode_to_int (machine_mode mode
)
7897 case BImode
: return 0; /* SPEC_MODE_FIRST */
7898 case QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7899 case HImode
: return 2;
7900 case SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7901 case DImode
: return 4;
7902 case SFmode
: return 5;
7903 case DFmode
: return 6;
7904 case XFmode
: return 7;
7906 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7907 mentioned in itanium[12].md. Predicate fp_register_operand also
7908 needs to be defined. Bottom line: better disable for now. */
7909 return SPEC_MODE_INVALID
;
7910 default: return SPEC_MODE_INVALID
;
7914 /* Provide information about speculation capabilities. */
7916 ia64_set_sched_flags (spec_info_t spec_info
)
7918 unsigned int *flags
= &(current_sched_info
->flags
);
7920 if (*flags
& SCHED_RGN
7921 || *flags
& SCHED_EBB
7922 || *flags
& SEL_SCHED
)
7926 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
7927 || (mflag_sched_ar_data_spec
&& reload_completed
))
7932 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
7933 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
7937 if (mflag_sched_control_spec
7939 || reload_completed
))
7941 mask
|= BEGIN_CONTROL
;
7943 if (!sel_sched_p () && mflag_sched_in_control_spec
)
7944 mask
|= BE_IN_CONTROL
;
7947 spec_info
->mask
= mask
;
7951 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
7953 if (mask
& BE_IN_SPEC
)
7956 spec_info
->flags
= 0;
7958 if ((mask
& CONTROL_SPEC
)
7959 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
7960 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
7962 if (sched_verbose
>= 1)
7963 spec_info
->dump
= sched_dump
;
7965 spec_info
->dump
= 0;
7967 if (mflag_sched_count_spec_in_critical_path
)
7968 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
7972 spec_info
->mask
= 0;
7975 /* If INSN is an appropriate load return its mode.
7976 Return -1 otherwise. */
7978 get_mode_no_for_insn (rtx_insn
*insn
)
7980 rtx reg
, mem
, mode_rtx
;
7984 extract_insn_cached (insn
);
7986 /* We use WHICH_ALTERNATIVE only after reload. This will
7987 guarantee that reload won't touch a speculative insn. */
7989 if (recog_data
.n_operands
!= 2)
7992 reg
= recog_data
.operand
[0];
7993 mem
= recog_data
.operand
[1];
7995 /* We should use MEM's mode since REG's mode in presence of
7996 ZERO_EXTEND will always be DImode. */
7997 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
7998 /* Process non-speculative ld. */
8000 if (!reload_completed
)
8002 /* Do not speculate into regs like ar.lc. */
8003 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
8010 rtx mem_reg
= XEXP (mem
, 0);
8012 if (!REG_P (mem_reg
))
8018 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
8020 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8026 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
8027 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
8028 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
8029 /* Process speculative ld or ld.c. */
8031 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8036 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
8038 if (attr_class
== ITANIUM_CLASS_CHK_A
8039 || attr_class
== ITANIUM_CLASS_CHK_S_I
8040 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
8047 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
8049 if (mode_no
== SPEC_MODE_INVALID
)
8052 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
8056 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
8057 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
8060 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
8066 /* If X is an unspec part of a speculative load, return its code.
8067 Return -1 otherwise. */
8069 get_spec_unspec_code (const_rtx x
)
8071 if (GET_CODE (x
) != UNSPEC
)
8093 /* Implement skip_rtx_p hook. */
8095 ia64_skip_rtx_p (const_rtx x
)
8097 return get_spec_unspec_code (x
) != -1;
8100 /* If INSN is a speculative load, return its UNSPEC code.
8101 Return -1 otherwise. */
8103 get_insn_spec_code (const_rtx insn
)
8107 pat
= PATTERN (insn
);
8109 if (GET_CODE (pat
) == COND_EXEC
)
8110 pat
= COND_EXEC_CODE (pat
);
8112 if (GET_CODE (pat
) != SET
)
8115 reg
= SET_DEST (pat
);
8119 mem
= SET_SRC (pat
);
8120 if (GET_CODE (mem
) == ZERO_EXTEND
)
8121 mem
= XEXP (mem
, 0);
8123 return get_spec_unspec_code (mem
);
8126 /* If INSN is a speculative load, return a ds with the speculation types.
8127 Otherwise [if INSN is a normal instruction] return 0. */
8129 ia64_get_insn_spec_ds (rtx_insn
*insn
)
8131 int code
= get_insn_spec_code (insn
);
8140 return BEGIN_CONTROL
;
8143 return BEGIN_DATA
| BEGIN_CONTROL
;
8150 /* If INSN is a speculative load return a ds with the speculation types that
8152 Otherwise [if INSN is a normal instruction] return 0. */
8154 ia64_get_insn_checked_ds (rtx_insn
*insn
)
8156 int code
= get_insn_spec_code (insn
);
8161 return BEGIN_DATA
| BEGIN_CONTROL
;
8164 return BEGIN_CONTROL
;
8168 return BEGIN_DATA
| BEGIN_CONTROL
;
8175 /* If GEN_P is true, calculate the index of needed speculation check and return
8176 speculative pattern for INSN with speculative mode TS, machine mode
8177 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8178 If GEN_P is false, just calculate the index of needed speculation check. */
8180 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
8183 gen_func_t gen_load
;
8185 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
8187 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
8188 copy_rtx (recog_data
.operand
[1]));
8190 pat
= PATTERN (insn
);
8191 if (GET_CODE (pat
) == COND_EXEC
)
8192 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8199 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
8200 ds_t ds ATTRIBUTE_UNUSED
)
8205 /* Implement targetm.sched.speculate_insn hook.
8206 Check if the INSN can be TS speculative.
8207 If 'no' - return -1.
8208 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8209 If current pattern of the INSN already provides TS speculation,
8212 ia64_speculate_insn (rtx_insn
*insn
, ds_t ts
, rtx
*new_pat
)
8217 gcc_assert (!(ts
& ~SPECULATIVE
));
8219 if (ia64_spec_check_p (insn
))
8222 if ((ts
& BE_IN_SPEC
)
8223 && !insn_can_be_in_speculative_p (insn
, ts
))
8226 mode_no
= get_mode_no_for_insn (insn
);
8228 if (mode_no
!= SPEC_MODE_INVALID
)
8230 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
8235 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
8244 /* Return a function that will generate a check for speculation TS with mode
8246 If simple check is needed, pass true for SIMPLE_CHECK_P.
8247 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8249 get_spec_check_gen_function (ds_t ts
, int mode_no
,
8250 bool simple_check_p
, bool clearing_check_p
)
8252 static gen_func_t gen_ld_c_clr
[] = {
8262 gen_zero_extendqidi2_clr
,
8263 gen_zero_extendhidi2_clr
,
8264 gen_zero_extendsidi2_clr
,
8266 static gen_func_t gen_ld_c_nc
[] = {
8276 gen_zero_extendqidi2_nc
,
8277 gen_zero_extendhidi2_nc
,
8278 gen_zero_extendsidi2_nc
,
8280 static gen_func_t gen_chk_a_clr
[] = {
8281 gen_advanced_load_check_clr_bi
,
8282 gen_advanced_load_check_clr_qi
,
8283 gen_advanced_load_check_clr_hi
,
8284 gen_advanced_load_check_clr_si
,
8285 gen_advanced_load_check_clr_di
,
8286 gen_advanced_load_check_clr_sf
,
8287 gen_advanced_load_check_clr_df
,
8288 gen_advanced_load_check_clr_xf
,
8289 gen_advanced_load_check_clr_ti
,
8290 gen_advanced_load_check_clr_di
,
8291 gen_advanced_load_check_clr_di
,
8292 gen_advanced_load_check_clr_di
,
8294 static gen_func_t gen_chk_a_nc
[] = {
8295 gen_advanced_load_check_nc_bi
,
8296 gen_advanced_load_check_nc_qi
,
8297 gen_advanced_load_check_nc_hi
,
8298 gen_advanced_load_check_nc_si
,
8299 gen_advanced_load_check_nc_di
,
8300 gen_advanced_load_check_nc_sf
,
8301 gen_advanced_load_check_nc_df
,
8302 gen_advanced_load_check_nc_xf
,
8303 gen_advanced_load_check_nc_ti
,
8304 gen_advanced_load_check_nc_di
,
8305 gen_advanced_load_check_nc_di
,
8306 gen_advanced_load_check_nc_di
,
8308 static gen_func_t gen_chk_s
[] = {
8309 gen_speculation_check_bi
,
8310 gen_speculation_check_qi
,
8311 gen_speculation_check_hi
,
8312 gen_speculation_check_si
,
8313 gen_speculation_check_di
,
8314 gen_speculation_check_sf
,
8315 gen_speculation_check_df
,
8316 gen_speculation_check_xf
,
8317 gen_speculation_check_ti
,
8318 gen_speculation_check_di
,
8319 gen_speculation_check_di
,
8320 gen_speculation_check_di
,
8323 gen_func_t
*gen_check
;
8325 if (ts
& BEGIN_DATA
)
8327 /* We don't need recovery because even if this is ld.sa
8328 ALAT entry will be allocated only if NAT bit is set to zero.
8329 So it is enough to use ld.c here. */
8333 gcc_assert (mflag_sched_spec_ldc
);
8335 if (clearing_check_p
)
8336 gen_check
= gen_ld_c_clr
;
8338 gen_check
= gen_ld_c_nc
;
8342 if (clearing_check_p
)
8343 gen_check
= gen_chk_a_clr
;
8345 gen_check
= gen_chk_a_nc
;
8348 else if (ts
& BEGIN_CONTROL
)
8351 /* We might want to use ld.sa -> ld.c instead of
8354 gcc_assert (!ia64_needs_block_p (ts
));
8356 if (clearing_check_p
)
8357 gen_check
= gen_ld_c_clr
;
8359 gen_check
= gen_ld_c_nc
;
8363 gen_check
= gen_chk_s
;
8369 gcc_assert (mode_no
>= 0);
8370 return gen_check
[mode_no
];
8373 /* Return nonzero, if INSN needs branchy recovery check. */
8375 ia64_needs_block_p (ds_t ts
)
8377 if (ts
& BEGIN_DATA
)
8378 return !mflag_sched_spec_ldc
;
8380 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
8382 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
8385 /* Generate (or regenerate) a recovery check for INSN. */
8387 ia64_gen_spec_check (rtx_insn
*insn
, rtx_insn
*label
, ds_t ds
)
8389 rtx op1
, pat
, check_pat
;
8390 gen_func_t gen_check
;
8393 mode_no
= get_mode_no_for_insn (insn
);
8394 gcc_assert (mode_no
>= 0);
8400 gcc_assert (!ia64_needs_block_p (ds
));
8401 op1
= copy_rtx (recog_data
.operand
[1]);
8404 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
8407 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
8409 pat
= PATTERN (insn
);
8410 if (GET_CODE (pat
) == COND_EXEC
)
8411 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8417 /* Return nonzero, if X is branchy recovery check. */
8419 ia64_spec_check_p (rtx x
)
8422 if (GET_CODE (x
) == COND_EXEC
)
8423 x
= COND_EXEC_CODE (x
);
8424 if (GET_CODE (x
) == SET
)
8425 return ia64_spec_check_src_p (SET_SRC (x
));
8429 /* Return nonzero, if SRC belongs to recovery check. */
8431 ia64_spec_check_src_p (rtx src
)
8433 if (GET_CODE (src
) == IF_THEN_ELSE
)
8438 if (GET_CODE (t
) == NE
)
8442 if (GET_CODE (t
) == UNSPEC
)
8448 if (code
== UNSPEC_LDCCLR
8449 || code
== UNSPEC_LDCNC
8450 || code
== UNSPEC_CHKACLR
8451 || code
== UNSPEC_CHKANC
8452 || code
== UNSPEC_CHKS
)
8454 gcc_assert (code
!= 0);
8464 /* The following page contains abstract data `bundle states' which are
8465 used for bundling insns (inserting nops and template generation). */
8467 /* The following describes state of insn bundling. */
8471 /* Unique bundle state number to identify them in the debugging
8474 rtx_insn
*insn
; /* corresponding insn, NULL for the 1st and the last state */
8475 /* number nops before and after the insn */
8476 short before_nops_num
, after_nops_num
;
8477 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
8479 int cost
; /* cost of the state in cycles */
8480 int accumulated_insns_num
; /* number of all previous insns including
8481 nops. L is considered as 2 insns */
8482 int branch_deviation
; /* deviation of previous branches from 3rd slots */
8483 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
8484 struct bundle_state
*next
; /* next state with the same insn_num */
8485 struct bundle_state
*originator
; /* originator (previous insn state) */
8486 /* All bundle states are in the following chain. */
8487 struct bundle_state
*allocated_states_chain
;
8488 /* The DFA State after issuing the insn and the nops. */
8492 /* The following is map insn number to the corresponding bundle state. */
8494 static struct bundle_state
**index_to_bundle_states
;
8496 /* The unique number of next bundle state. */
8498 static int bundle_states_num
;
8500 /* All allocated bundle states are in the following chain. */
8502 static struct bundle_state
*allocated_bundle_states_chain
;
8504 /* All allocated but not used bundle states are in the following
8507 static struct bundle_state
*free_bundle_state_chain
;
8510 /* The following function returns a free bundle state. */
8512 static struct bundle_state
*
8513 get_free_bundle_state (void)
8515 struct bundle_state
*result
;
8517 if (free_bundle_state_chain
!= NULL
)
8519 result
= free_bundle_state_chain
;
8520 free_bundle_state_chain
= result
->next
;
8524 result
= XNEW (struct bundle_state
);
8525 result
->dfa_state
= xmalloc (dfa_state_size
);
8526 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8527 allocated_bundle_states_chain
= result
;
8529 result
->unique_num
= bundle_states_num
++;
8534 /* The following function frees given bundle state. */
8537 free_bundle_state (struct bundle_state
*state
)
8539 state
->next
= free_bundle_state_chain
;
8540 free_bundle_state_chain
= state
;
8543 /* Start work with abstract data `bundle states'. */
8546 initiate_bundle_states (void)
8548 bundle_states_num
= 0;
8549 free_bundle_state_chain
= NULL
;
8550 allocated_bundle_states_chain
= NULL
;
8553 /* Finish work with abstract data `bundle states'. */
8556 finish_bundle_states (void)
8558 struct bundle_state
*curr_state
, *next_state
;
8560 for (curr_state
= allocated_bundle_states_chain
;
8562 curr_state
= next_state
)
8564 next_state
= curr_state
->allocated_states_chain
;
8565 free (curr_state
->dfa_state
);
8570 /* Hashtable helpers. */
8572 struct bundle_state_hasher
: nofree_ptr_hash
<bundle_state
>
8574 static inline hashval_t
hash (const bundle_state
*);
8575 static inline bool equal (const bundle_state
*, const bundle_state
*);
8578 /* The function returns hash of BUNDLE_STATE. */
8581 bundle_state_hasher::hash (const bundle_state
*state
)
8585 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8586 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8587 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8588 return result
+ state
->insn_num
;
8591 /* The function returns nonzero if the bundle state keys are equal. */
8594 bundle_state_hasher::equal (const bundle_state
*state1
,
8595 const bundle_state
*state2
)
8597 return (state1
->insn_num
== state2
->insn_num
8598 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8599 dfa_state_size
) == 0);
8602 /* Hash table of the bundle states. The key is dfa_state and insn_num
8603 of the bundle states. */
8605 static hash_table
<bundle_state_hasher
> *bundle_state_table
;
8607 /* The function inserts the BUNDLE_STATE into the hash table. The
8608 function returns nonzero if the bundle has been inserted into the
8609 table. The table contains the best bundle state with given key. */
8612 insert_bundle_state (struct bundle_state
*bundle_state
)
8614 struct bundle_state
**entry_ptr
;
8616 entry_ptr
= bundle_state_table
->find_slot (bundle_state
, INSERT
);
8617 if (*entry_ptr
== NULL
)
8619 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8620 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8621 *entry_ptr
= bundle_state
;
8624 else if (bundle_state
->cost
< (*entry_ptr
)->cost
8625 || (bundle_state
->cost
== (*entry_ptr
)->cost
8626 && ((*entry_ptr
)->accumulated_insns_num
8627 > bundle_state
->accumulated_insns_num
8628 || ((*entry_ptr
)->accumulated_insns_num
8629 == bundle_state
->accumulated_insns_num
8630 && ((*entry_ptr
)->branch_deviation
8631 > bundle_state
->branch_deviation
8632 || ((*entry_ptr
)->branch_deviation
8633 == bundle_state
->branch_deviation
8634 && (*entry_ptr
)->middle_bundle_stops
8635 > bundle_state
->middle_bundle_stops
))))))
8638 struct bundle_state temp
;
8641 **entry_ptr
= *bundle_state
;
8642 (*entry_ptr
)->next
= temp
.next
;
8643 *bundle_state
= temp
;
8648 /* Start work with the hash table. */
8651 initiate_bundle_state_table (void)
8653 bundle_state_table
= new hash_table
<bundle_state_hasher
> (50);
8656 /* Finish work with the hash table. */
8659 finish_bundle_state_table (void)
8661 delete bundle_state_table
;
8662 bundle_state_table
= NULL
;
8667 /* The following variable is a insn `nop' used to check bundle states
8668 with different number of inserted nops. */
8670 static rtx_insn
*ia64_nop
;
8672 /* The following function tries to issue NOPS_NUM nops for the current
8673 state without advancing processor cycle. If it failed, the
8674 function returns FALSE and frees the current state. */
8677 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8681 for (i
= 0; i
< nops_num
; i
++)
8682 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8684 free_bundle_state (curr_state
);
8690 /* The following function tries to issue INSN for the current
8691 state without advancing processor cycle. If it failed, the
8692 function returns FALSE and frees the current state. */
8695 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8697 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8699 free_bundle_state (curr_state
);
8705 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8706 starting with ORIGINATOR without advancing processor cycle. If
8707 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8708 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8709 If it was successful, the function creates new bundle state and
8710 insert into the hash table and into `index_to_bundle_states'. */
8713 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8714 rtx_insn
*insn
, int try_bundle_end_p
,
8715 int only_bundle_end_p
)
8717 struct bundle_state
*curr_state
;
8719 curr_state
= get_free_bundle_state ();
8720 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8721 curr_state
->insn
= insn
;
8722 curr_state
->insn_num
= originator
->insn_num
+ 1;
8723 curr_state
->cost
= originator
->cost
;
8724 curr_state
->originator
= originator
;
8725 curr_state
->before_nops_num
= before_nops_num
;
8726 curr_state
->after_nops_num
= 0;
8727 curr_state
->accumulated_insns_num
8728 = originator
->accumulated_insns_num
+ before_nops_num
;
8729 curr_state
->branch_deviation
= originator
->branch_deviation
;
8730 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8732 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8734 gcc_assert (GET_MODE (insn
) != TImode
);
8735 if (!try_issue_nops (curr_state
, before_nops_num
))
8737 if (!try_issue_insn (curr_state
, insn
))
8739 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8740 if (curr_state
->accumulated_insns_num
% 3 != 0)
8741 curr_state
->middle_bundle_stops
++;
8742 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8743 && curr_state
->accumulated_insns_num
% 3 != 0)
8745 free_bundle_state (curr_state
);
8749 else if (GET_MODE (insn
) != TImode
)
8751 if (!try_issue_nops (curr_state
, before_nops_num
))
8753 if (!try_issue_insn (curr_state
, insn
))
8755 curr_state
->accumulated_insns_num
++;
8756 gcc_assert (!unknown_for_bundling_p (insn
));
8758 if (ia64_safe_type (insn
) == TYPE_L
)
8759 curr_state
->accumulated_insns_num
++;
8763 /* If this is an insn that must be first in a group, then don't allow
8764 nops to be emitted before it. Currently, alloc is the only such
8765 supported instruction. */
8766 /* ??? The bundling automatons should handle this for us, but they do
8767 not yet have support for the first_insn attribute. */
8768 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8770 free_bundle_state (curr_state
);
8774 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8775 state_transition (curr_state
->dfa_state
, NULL
);
8777 if (!try_issue_nops (curr_state
, before_nops_num
))
8779 if (!try_issue_insn (curr_state
, insn
))
8781 curr_state
->accumulated_insns_num
++;
8782 if (unknown_for_bundling_p (insn
))
8784 /* Finish bundle containing asm insn. */
8785 curr_state
->after_nops_num
8786 = 3 - curr_state
->accumulated_insns_num
% 3;
8787 curr_state
->accumulated_insns_num
8788 += 3 - curr_state
->accumulated_insns_num
% 3;
8790 else if (ia64_safe_type (insn
) == TYPE_L
)
8791 curr_state
->accumulated_insns_num
++;
8793 if (ia64_safe_type (insn
) == TYPE_B
)
8794 curr_state
->branch_deviation
8795 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8796 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8798 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8801 struct bundle_state
*curr_state1
;
8802 struct bundle_state
*allocated_states_chain
;
8804 curr_state1
= get_free_bundle_state ();
8805 dfa_state
= curr_state1
->dfa_state
;
8806 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8807 *curr_state1
= *curr_state
;
8808 curr_state1
->dfa_state
= dfa_state
;
8809 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8810 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8812 curr_state
= curr_state1
;
8814 if (!try_issue_nops (curr_state
,
8815 3 - curr_state
->accumulated_insns_num
% 3))
8817 curr_state
->after_nops_num
8818 = 3 - curr_state
->accumulated_insns_num
% 3;
8819 curr_state
->accumulated_insns_num
8820 += 3 - curr_state
->accumulated_insns_num
% 3;
8822 if (!insert_bundle_state (curr_state
))
8823 free_bundle_state (curr_state
);
8827 /* The following function returns position in the two window bundle
8831 get_max_pos (state_t state
)
8833 if (cpu_unit_reservation_p (state
, pos_6
))
8835 else if (cpu_unit_reservation_p (state
, pos_5
))
8837 else if (cpu_unit_reservation_p (state
, pos_4
))
8839 else if (cpu_unit_reservation_p (state
, pos_3
))
8841 else if (cpu_unit_reservation_p (state
, pos_2
))
8843 else if (cpu_unit_reservation_p (state
, pos_1
))
8849 /* The function returns code of a possible template for given position
8850 and state. The function should be called only with 2 values of
8851 position equal to 3 or 6. We avoid generating F NOPs by putting
8852 templates containing F insns at the end of the template search
8853 because undocumented anomaly in McKinley derived cores which can
8854 cause stalls if an F-unit insn (including a NOP) is issued within a
8855 six-cycle window after reading certain application registers (such
8856 as ar.bsp). Furthermore, power-considerations also argue against
8857 the use of F-unit instructions unless they're really needed. */
8860 get_template (state_t state
, int pos
)
8865 if (cpu_unit_reservation_p (state
, _0mmi_
))
8867 else if (cpu_unit_reservation_p (state
, _0mii_
))
8869 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8871 else if (cpu_unit_reservation_p (state
, _0mib_
))
8873 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8875 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8877 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8879 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8881 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8883 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8888 if (cpu_unit_reservation_p (state
, _1mmi_
))
8890 else if (cpu_unit_reservation_p (state
, _1mii_
))
8892 else if (cpu_unit_reservation_p (state
, _1mmb_
))
8894 else if (cpu_unit_reservation_p (state
, _1mib_
))
8896 else if (cpu_unit_reservation_p (state
, _1mbb_
))
8898 else if (cpu_unit_reservation_p (state
, _1bbb_
))
8900 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
8902 else if (cpu_unit_reservation_p (state
, _1mfi_
))
8904 else if (cpu_unit_reservation_p (state
, _1mfb_
))
8906 else if (cpu_unit_reservation_p (state
, _1mlx_
))
8915 /* True when INSN is important for bundling. */
8918 important_for_bundling_p (rtx_insn
*insn
)
8920 return (INSN_P (insn
)
8921 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
8922 && GET_CODE (PATTERN (insn
)) != USE
8923 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8926 /* The following function returns an insn important for insn bundling
8927 followed by INSN and before TAIL. */
8930 get_next_important_insn (rtx_insn
*insn
, rtx_insn
*tail
)
8932 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
8933 if (important_for_bundling_p (insn
))
8938 /* True when INSN is unknown, but important, for bundling. */
8941 unknown_for_bundling_p (rtx_insn
*insn
)
8943 return (INSN_P (insn
)
8944 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_UNKNOWN
8945 && GET_CODE (PATTERN (insn
)) != USE
8946 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
8949 /* Add a bundle selector TEMPLATE0 before INSN. */
8952 ia64_add_bundle_selector_before (int template0
, rtx_insn
*insn
)
8954 rtx b
= gen_bundle_selector (GEN_INT (template0
));
8956 ia64_emit_insn_before (b
, insn
);
8957 #if NR_BUNDLES == 10
8958 if ((template0
== 4 || template0
== 5)
8959 && ia64_except_unwind_info (&global_options
) == UI_TARGET
)
8962 rtx note
= NULL_RTX
;
8964 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8965 first or second slot. If it is and has REG_EH_NOTE set, copy it
8966 to following nops, as br.call sets rp to the address of following
8967 bundle and therefore an EH region end must be on a bundle
8969 insn
= PREV_INSN (insn
);
8970 for (i
= 0; i
< 3; i
++)
8973 insn
= next_active_insn (insn
);
8974 while (NONJUMP_INSN_P (insn
)
8975 && get_attr_empty (insn
) == EMPTY_YES
);
8977 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
8982 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
8983 || code
== CODE_FOR_nop_b
);
8984 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
8987 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
8994 /* The following function does insn bundling. Bundling means
8995 inserting templates and nop insns to fit insn groups into permitted
8996 templates. Instruction scheduling uses NDFA (non-deterministic
8997 finite automata) encoding informations about the templates and the
8998 inserted nops. Nondeterminism of the automata permits follows
8999 all possible insn sequences very fast.
9001 Unfortunately it is not possible to get information about inserting
9002 nop insns and used templates from the automata states. The
9003 automata only says that we can issue an insn possibly inserting
9004 some nops before it and using some template. Therefore insn
9005 bundling in this function is implemented by using DFA
9006 (deterministic finite automata). We follow all possible insn
9007 sequences by inserting 0-2 nops (that is what the NDFA describe for
9008 insn scheduling) before/after each insn being bundled. We know the
9009 start of simulated processor cycle from insn scheduling (insn
9010 starting a new cycle has TImode).
9012 Simple implementation of insn bundling would create enormous
9013 number of possible insn sequences satisfying information about new
9014 cycle ticks taken from the insn scheduling. To make the algorithm
9015 practical we use dynamic programming. Each decision (about
9016 inserting nops and implicitly about previous decisions) is described
9017 by structure bundle_state (see above). If we generate the same
9018 bundle state (key is automaton state after issuing the insns and
9019 nops for it), we reuse already generated one. As consequence we
9020 reject some decisions which cannot improve the solution and
9021 reduce memory for the algorithm.
9023 When we reach the end of EBB (extended basic block), we choose the
9024 best sequence and then, moving back in EBB, insert templates for
9025 the best alternative. The templates are taken from querying
9026 automaton state for each insn in chosen bundle states.
9028 So the algorithm makes two (forward and backward) passes through
9032 bundling (FILE *dump
, int verbose
, rtx_insn
*prev_head_insn
, rtx_insn
*tail
)
9034 struct bundle_state
*curr_state
, *next_state
, *best_state
;
9035 rtx_insn
*insn
, *next_insn
;
9037 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
9038 int pos
= 0, max_pos
, template0
, template1
;
9040 enum attr_type type
;
9043 /* Count insns in the EBB. */
9044 for (insn
= NEXT_INSN (prev_head_insn
);
9045 insn
&& insn
!= tail
;
9046 insn
= NEXT_INSN (insn
))
9052 dfa_clean_insn_cache ();
9053 initiate_bundle_state_table ();
9054 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
9055 /* First (forward) pass -- generation of bundle states. */
9056 curr_state
= get_free_bundle_state ();
9057 curr_state
->insn
= NULL
;
9058 curr_state
->before_nops_num
= 0;
9059 curr_state
->after_nops_num
= 0;
9060 curr_state
->insn_num
= 0;
9061 curr_state
->cost
= 0;
9062 curr_state
->accumulated_insns_num
= 0;
9063 curr_state
->branch_deviation
= 0;
9064 curr_state
->middle_bundle_stops
= 0;
9065 curr_state
->next
= NULL
;
9066 curr_state
->originator
= NULL
;
9067 state_reset (curr_state
->dfa_state
);
9068 index_to_bundle_states
[0] = curr_state
;
9070 /* Shift cycle mark if it is put on insn which could be ignored. */
9071 for (insn
= NEXT_INSN (prev_head_insn
);
9073 insn
= NEXT_INSN (insn
))
9075 && !important_for_bundling_p (insn
)
9076 && GET_MODE (insn
) == TImode
)
9078 PUT_MODE (insn
, VOIDmode
);
9079 for (next_insn
= NEXT_INSN (insn
);
9081 next_insn
= NEXT_INSN (next_insn
))
9082 if (important_for_bundling_p (next_insn
)
9083 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
9085 PUT_MODE (next_insn
, TImode
);
9089 /* Forward pass: generation of bundle states. */
9090 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
9094 gcc_assert (important_for_bundling_p (insn
));
9095 type
= ia64_safe_type (insn
);
9096 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
9098 index_to_bundle_states
[insn_num
] = NULL
;
9099 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
9101 curr_state
= next_state
)
9103 pos
= curr_state
->accumulated_insns_num
% 3;
9104 next_state
= curr_state
->next
;
9105 /* We must fill up the current bundle in order to start a
9106 subsequent asm insn in a new bundle. Asm insn is always
9107 placed in a separate bundle. */
9109 = (next_insn
!= NULL_RTX
9110 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
9111 && unknown_for_bundling_p (next_insn
));
9112 /* We may fill up the current bundle if it is the cycle end
9113 without a group barrier. */
9115 = (only_bundle_end_p
|| next_insn
== NULL_RTX
9116 || (GET_MODE (next_insn
) == TImode
9117 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
9118 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
9120 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
9122 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
9124 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
9127 gcc_assert (index_to_bundle_states
[insn_num
]);
9128 for (curr_state
= index_to_bundle_states
[insn_num
];
9130 curr_state
= curr_state
->next
)
9131 if (verbose
>= 2 && dump
)
9133 /* This structure is taken from generated code of the
9134 pipeline hazard recognizer (see file insn-attrtab.c).
9135 Please don't forget to change the structure if a new
9136 automaton is added to .md file. */
9139 unsigned short one_automaton_state
;
9140 unsigned short oneb_automaton_state
;
9141 unsigned short two_automaton_state
;
9142 unsigned short twob_automaton_state
;
9147 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9148 curr_state
->unique_num
,
9149 (curr_state
->originator
== NULL
9150 ? -1 : curr_state
->originator
->unique_num
),
9152 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9153 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9154 curr_state
->middle_bundle_stops
,
9155 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9160 /* We should find a solution because the 2nd insn scheduling has
9162 gcc_assert (index_to_bundle_states
[insn_num
]);
9163 /* Find a state corresponding to the best insn sequence. */
9165 for (curr_state
= index_to_bundle_states
[insn_num
];
9167 curr_state
= curr_state
->next
)
9168 /* We are just looking at the states with fully filled up last
9169 bundle. The first we prefer insn sequences with minimal cost
9170 then with minimal inserted nops and finally with branch insns
9171 placed in the 3rd slots. */
9172 if (curr_state
->accumulated_insns_num
% 3 == 0
9173 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
9174 || (best_state
->cost
== curr_state
->cost
9175 && (curr_state
->accumulated_insns_num
9176 < best_state
->accumulated_insns_num
9177 || (curr_state
->accumulated_insns_num
9178 == best_state
->accumulated_insns_num
9179 && (curr_state
->branch_deviation
9180 < best_state
->branch_deviation
9181 || (curr_state
->branch_deviation
9182 == best_state
->branch_deviation
9183 && curr_state
->middle_bundle_stops
9184 < best_state
->middle_bundle_stops
)))))))
9185 best_state
= curr_state
;
9186 /* Second (backward) pass: adding nops and templates. */
9187 gcc_assert (best_state
);
9188 insn_num
= best_state
->before_nops_num
;
9189 template0
= template1
= -1;
9190 for (curr_state
= best_state
;
9191 curr_state
->originator
!= NULL
;
9192 curr_state
= curr_state
->originator
)
9194 insn
= curr_state
->insn
;
9195 asm_p
= unknown_for_bundling_p (insn
);
9197 if (verbose
>= 2 && dump
)
9201 unsigned short one_automaton_state
;
9202 unsigned short oneb_automaton_state
;
9203 unsigned short two_automaton_state
;
9204 unsigned short twob_automaton_state
;
9209 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9210 curr_state
->unique_num
,
9211 (curr_state
->originator
== NULL
9212 ? -1 : curr_state
->originator
->unique_num
),
9214 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9215 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9216 curr_state
->middle_bundle_stops
,
9217 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9220 /* Find the position in the current bundle window. The window can
9221 contain at most two bundles. Two bundle window means that
9222 the processor will make two bundle rotation. */
9223 max_pos
= get_max_pos (curr_state
->dfa_state
);
9225 /* The following (negative template number) means that the
9226 processor did one bundle rotation. */
9227 || (max_pos
== 3 && template0
< 0))
9229 /* We are at the end of the window -- find template(s) for
9233 template0
= get_template (curr_state
->dfa_state
, 3);
9236 template1
= get_template (curr_state
->dfa_state
, 3);
9237 template0
= get_template (curr_state
->dfa_state
, 6);
9240 if (max_pos
> 3 && template1
< 0)
9241 /* It may happen when we have the stop inside a bundle. */
9243 gcc_assert (pos
<= 3);
9244 template1
= get_template (curr_state
->dfa_state
, 3);
9248 /* Emit nops after the current insn. */
9249 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
9251 rtx nop_pat
= gen_nop ();
9252 rtx_insn
*nop
= emit_insn_after (nop_pat
, insn
);
9254 gcc_assert (pos
>= 0);
9257 /* We are at the start of a bundle: emit the template
9258 (it should be defined). */
9259 gcc_assert (template0
>= 0);
9260 ia64_add_bundle_selector_before (template0
, nop
);
9261 /* If we have two bundle window, we make one bundle
9262 rotation. Otherwise template0 will be undefined
9263 (negative value). */
9264 template0
= template1
;
9268 /* Move the position backward in the window. Group barrier has
9269 no slot. Asm insn takes all bundle. */
9270 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9271 && !unknown_for_bundling_p (insn
))
9273 /* Long insn takes 2 slots. */
9274 if (ia64_safe_type (insn
) == TYPE_L
)
9276 gcc_assert (pos
>= 0);
9278 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9279 && !unknown_for_bundling_p (insn
))
9281 /* The current insn is at the bundle start: emit the
9283 gcc_assert (template0
>= 0);
9284 ia64_add_bundle_selector_before (template0
, insn
);
9285 b
= PREV_INSN (insn
);
9287 /* See comment above in analogous place for emitting nops
9289 template0
= template1
;
9292 /* Emit nops after the current insn. */
9293 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
9295 rtx nop_pat
= gen_nop ();
9296 ia64_emit_insn_before (nop_pat
, insn
);
9297 rtx_insn
*nop
= PREV_INSN (insn
);
9300 gcc_assert (pos
>= 0);
9303 /* See comment above in analogous place for emitting nops
9305 gcc_assert (template0
>= 0);
9306 ia64_add_bundle_selector_before (template0
, insn
);
9307 b
= PREV_INSN (insn
);
9309 template0
= template1
;
9317 /* Assert right calculation of middle_bundle_stops. */
9318 int num
= best_state
->middle_bundle_stops
;
9319 bool start_bundle
= true, end_bundle
= false;
9321 for (insn
= NEXT_INSN (prev_head_insn
);
9322 insn
&& insn
!= tail
;
9323 insn
= NEXT_INSN (insn
))
9327 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
9328 start_bundle
= true;
9331 rtx_insn
*next_insn
;
9333 for (next_insn
= NEXT_INSN (insn
);
9334 next_insn
&& next_insn
!= tail
;
9335 next_insn
= NEXT_INSN (next_insn
))
9336 if (INSN_P (next_insn
)
9337 && (ia64_safe_itanium_class (next_insn
)
9338 != ITANIUM_CLASS_IGNORE
9339 || recog_memoized (next_insn
)
9340 == CODE_FOR_bundle_selector
)
9341 && GET_CODE (PATTERN (next_insn
)) != USE
9342 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
9345 end_bundle
= next_insn
== NULL_RTX
9346 || next_insn
== tail
9347 || (INSN_P (next_insn
)
9348 && recog_memoized (next_insn
) == CODE_FOR_bundle_selector
);
9349 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
9350 && !start_bundle
&& !end_bundle
9352 && !unknown_for_bundling_p (next_insn
))
9355 start_bundle
= false;
9359 gcc_assert (num
== 0);
9362 free (index_to_bundle_states
);
9363 finish_bundle_state_table ();
9365 dfa_clean_insn_cache ();
9368 /* The following function is called at the end of scheduling BB or
9369 EBB. After reload, it inserts stop bits and does insn bundling. */
9372 ia64_sched_finish (FILE *dump
, int sched_verbose
)
9375 fprintf (dump
, "// Finishing schedule.\n");
9376 if (!reload_completed
)
9378 if (reload_completed
)
9380 final_emit_insn_group_barriers (dump
);
9381 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
9382 current_sched_info
->next_tail
);
9383 if (sched_verbose
&& dump
)
9384 fprintf (dump
, "// finishing %d-%d\n",
9385 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
9386 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
9392 /* The following function inserts stop bits in scheduled BB or EBB. */
9395 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
9398 int need_barrier_p
= 0;
9399 int seen_good_insn
= 0;
9401 init_insn_group_barriers ();
9403 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
9404 insn
!= current_sched_info
->next_tail
;
9405 insn
= NEXT_INSN (insn
))
9407 if (BARRIER_P (insn
))
9409 rtx_insn
*last
= prev_active_insn (insn
);
9413 if (JUMP_TABLE_DATA_P (last
))
9414 last
= prev_active_insn (last
);
9415 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9416 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
9418 init_insn_group_barriers ();
9422 else if (NONDEBUG_INSN_P (insn
))
9424 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
9426 init_insn_group_barriers ();
9430 else if (need_barrier_p
|| group_barrier_needed (insn
)
9431 || (mflag_sched_stop_bits_after_every_cycle
9432 && GET_MODE (insn
) == TImode
9435 if (TARGET_EARLY_STOP_BITS
)
9440 last
!= current_sched_info
->prev_head
;
9441 last
= PREV_INSN (last
))
9442 if (INSN_P (last
) && GET_MODE (last
) == TImode
9443 && stops_p
[INSN_UID (last
)])
9445 if (last
== current_sched_info
->prev_head
)
9447 last
= prev_active_insn (last
);
9449 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9450 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9452 init_insn_group_barriers ();
9453 for (last
= NEXT_INSN (last
);
9455 last
= NEXT_INSN (last
))
9458 group_barrier_needed (last
);
9459 if (recog_memoized (last
) >= 0
9460 && important_for_bundling_p (last
))
9466 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9468 init_insn_group_barriers ();
9471 group_barrier_needed (insn
);
9472 if (recog_memoized (insn
) >= 0
9473 && important_for_bundling_p (insn
))
9476 else if (recog_memoized (insn
) >= 0
9477 && important_for_bundling_p (insn
))
9479 need_barrier_p
= (CALL_P (insn
) || unknown_for_bundling_p (insn
));
9486 /* If the following function returns TRUE, we will use the DFA
9490 ia64_first_cycle_multipass_dfa_lookahead (void)
9492 return (reload_completed
? 6 : 4);
9495 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9498 ia64_init_dfa_pre_cycle_insn (void)
9500 if (temp_dfa_state
== NULL
)
9502 dfa_state_size
= state_size ();
9503 temp_dfa_state
= xmalloc (dfa_state_size
);
9504 prev_cycle_state
= xmalloc (dfa_state_size
);
9506 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9507 SET_PREV_INSN (dfa_pre_cycle_insn
) = SET_NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9508 recog_memoized (dfa_pre_cycle_insn
);
9509 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9510 SET_PREV_INSN (dfa_stop_insn
) = SET_NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9511 recog_memoized (dfa_stop_insn
);
9514 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9515 used by the DFA insn scheduler. */
9518 ia64_dfa_pre_cycle_insn (void)
9520 return dfa_pre_cycle_insn
;
9523 /* The following function returns TRUE if PRODUCER (of type ilog or
9524 ld) produces address for CONSUMER (of type st or stf). */
9527 ia64_st_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9531 gcc_assert (producer
&& consumer
);
9532 dest
= ia64_single_set (producer
);
9534 reg
= SET_DEST (dest
);
9536 if (GET_CODE (reg
) == SUBREG
)
9537 reg
= SUBREG_REG (reg
);
9538 gcc_assert (GET_CODE (reg
) == REG
);
9540 dest
= ia64_single_set (consumer
);
9542 mem
= SET_DEST (dest
);
9543 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9544 return reg_mentioned_p (reg
, mem
);
9547 /* The following function returns TRUE if PRODUCER (of type ilog or
9548 ld) produces address for CONSUMER (of type ld or fld). */
9551 ia64_ld_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9553 rtx dest
, src
, reg
, mem
;
9555 gcc_assert (producer
&& consumer
);
9556 dest
= ia64_single_set (producer
);
9558 reg
= SET_DEST (dest
);
9560 if (GET_CODE (reg
) == SUBREG
)
9561 reg
= SUBREG_REG (reg
);
9562 gcc_assert (GET_CODE (reg
) == REG
);
9564 src
= ia64_single_set (consumer
);
9566 mem
= SET_SRC (src
);
9569 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9570 mem
= XVECEXP (mem
, 0, 0);
9571 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9572 /* ??? Is this bypass necessary for ld.c? */
9574 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9575 mem
= XEXP (mem
, 1);
9578 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9579 mem
= XEXP (mem
, 0);
9581 if (GET_CODE (mem
) == UNSPEC
)
9583 int c
= XINT (mem
, 1);
9585 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9586 || c
== UNSPEC_LDSA
);
9587 mem
= XVECEXP (mem
, 0, 0);
9590 /* Note that LO_SUM is used for GOT loads. */
9591 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9593 return reg_mentioned_p (reg
, mem
);
9596 /* The following function returns TRUE if INSN produces address for a
9597 load/store insn. We will place such insns into M slot because it
9598 decreases its latency time. */
9601 ia64_produce_address_p (rtx insn
)
9607 /* Emit pseudo-ops for the assembler to describe predicate relations.
9608 At present this assumes that we only consider predicate pairs to
9609 be mutex, and that the assembler can deduce proper values from
9610 straight-line code. */
9613 emit_predicate_relation_info (void)
9617 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9620 rtx_insn
*head
= BB_HEAD (bb
);
9622 /* We only need such notes at code labels. */
9623 if (! LABEL_P (head
))
9625 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9626 head
= NEXT_INSN (head
);
9628 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9629 grabbing the entire block of predicate registers. */
9630 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9631 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9633 rtx p
= gen_rtx_REG (BImode
, r
);
9634 rtx_insn
*n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9635 if (head
== BB_END (bb
))
9641 /* Look for conditional calls that do not return, and protect predicate
9642 relations around them. Otherwise the assembler will assume the call
9643 returns, and complain about uses of call-clobbered predicates after
9645 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9647 rtx_insn
*insn
= BB_HEAD (bb
);
9652 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9653 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9656 emit_insn_before (gen_safe_across_calls_all (), insn
);
9657 rtx_insn
*a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9658 if (BB_HEAD (bb
) == insn
)
9660 if (BB_END (bb
) == insn
)
9664 if (insn
== BB_END (bb
))
9666 insn
= NEXT_INSN (insn
);
9671 /* Perform machine dependent operations on the rtl chain INSNS. */
9676 /* We are freeing block_for_insn in the toplev to keep compatibility
9677 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9678 compute_bb_for_insn ();
9680 /* If optimizing, we'll have split before scheduling. */
9684 if (optimize
&& flag_schedule_insns_after_reload
9685 && dbg_cnt (ia64_sched2
))
9688 timevar_push (TV_SCHED2
);
9689 ia64_final_schedule
= 1;
9691 /* We can't let modulo-sched prevent us from scheduling any bbs,
9692 since we need the final schedule to produce bundle information. */
9693 FOR_EACH_BB_FN (bb
, cfun
)
9694 bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
9696 initiate_bundle_states ();
9697 ia64_nop
= make_insn_raw (gen_nop ());
9698 SET_PREV_INSN (ia64_nop
) = SET_NEXT_INSN (ia64_nop
) = NULL_RTX
;
9699 recog_memoized (ia64_nop
);
9700 clocks_length
= get_max_uid () + 1;
9701 stops_p
= XCNEWVEC (char, clocks_length
);
9703 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9705 pos_1
= get_cpu_unit_code ("2_1");
9706 pos_2
= get_cpu_unit_code ("2_2");
9707 pos_3
= get_cpu_unit_code ("2_3");
9708 pos_4
= get_cpu_unit_code ("2_4");
9709 pos_5
= get_cpu_unit_code ("2_5");
9710 pos_6
= get_cpu_unit_code ("2_6");
9711 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9712 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9713 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9714 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9715 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9716 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9717 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9718 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9719 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9720 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9721 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9722 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9723 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9724 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9725 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9726 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9727 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9728 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9729 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9730 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9734 pos_1
= get_cpu_unit_code ("1_1");
9735 pos_2
= get_cpu_unit_code ("1_2");
9736 pos_3
= get_cpu_unit_code ("1_3");
9737 pos_4
= get_cpu_unit_code ("1_4");
9738 pos_5
= get_cpu_unit_code ("1_5");
9739 pos_6
= get_cpu_unit_code ("1_6");
9740 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9741 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9742 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9743 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9744 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9745 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9746 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9747 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9748 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9749 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9750 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9751 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9752 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9753 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9754 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9755 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9756 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9757 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9758 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9759 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9762 if (flag_selective_scheduling2
9763 && !maybe_skip_selective_scheduling ())
9764 run_selective_scheduling ();
9768 /* Redo alignment computation, as it might gone wrong. */
9769 compute_alignments ();
9771 /* We cannot reuse this one because it has been corrupted by the
9773 finish_bundle_states ();
9776 emit_insn_group_barriers (dump_file
);
9778 ia64_final_schedule
= 0;
9779 timevar_pop (TV_SCHED2
);
9782 emit_all_insn_group_barriers (dump_file
);
9786 /* A call must not be the last instruction in a function, so that the
9787 return address is still within the function, so that unwinding works
9788 properly. Note that IA-64 differs from dwarf2 on this point. */
9789 if (ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9794 insn
= get_last_insn ();
9795 if (! INSN_P (insn
))
9796 insn
= prev_active_insn (insn
);
9799 /* Skip over insns that expand to nothing. */
9800 while (NONJUMP_INSN_P (insn
)
9801 && get_attr_empty (insn
) == EMPTY_YES
)
9803 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9804 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9806 insn
= prev_active_insn (insn
);
9811 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9812 emit_insn (gen_break_f ());
9813 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9818 emit_predicate_relation_info ();
9820 if (flag_var_tracking
)
9822 timevar_push (TV_VAR_TRACKING
);
9823 variable_tracking_main ();
9824 timevar_pop (TV_VAR_TRACKING
);
9826 df_finish_pass (false);
9829 /* Return true if REGNO is used by the epilogue. */
9832 ia64_epilogue_uses (int regno
)
9837 /* With a call to a function in another module, we will write a new
9838 value to "gp". After returning from such a call, we need to make
9839 sure the function restores the original gp-value, even if the
9840 function itself does not use the gp anymore. */
9841 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9843 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9844 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9845 /* For functions defined with the syscall_linkage attribute, all
9846 input registers are marked as live at all function exits. This
9847 prevents the register allocator from using the input registers,
9848 which in turn makes it possible to restart a system call after
9849 an interrupt without having to save/restore the input registers.
9850 This also prevents kernel data from leaking to application code. */
9851 return lookup_attribute ("syscall_linkage",
9852 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9855 /* Conditional return patterns can't represent the use of `b0' as
9856 the return address, so we force the value live this way. */
9860 /* Likewise for ar.pfs, which is used by br.ret. */
9868 /* Return true if REGNO is used by the frame unwinder. */
9871 ia64_eh_uses (int regno
)
9875 if (! reload_completed
)
9881 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9882 if (regno
== current_frame_info
.r
[r
]
9883 || regno
== emitted_frame_related_regs
[r
])
9889 /* Return true if this goes in small data/bss. */
9891 /* ??? We could also support own long data here. Generating movl/add/ld8
9892 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9893 code faster because there is one less load. This also includes incomplete
9894 types which can't go in sdata/sbss. */
9897 ia64_in_small_data_p (const_tree exp
)
9899 if (TARGET_NO_SDATA
)
9902 /* We want to merge strings, so we never consider them small data. */
9903 if (TREE_CODE (exp
) == STRING_CST
)
9906 /* Functions are never small data. */
9907 if (TREE_CODE (exp
) == FUNCTION_DECL
)
9910 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
9912 const char *section
= DECL_SECTION_NAME (exp
);
9914 if (strcmp (section
, ".sdata") == 0
9915 || strncmp (section
, ".sdata.", 7) == 0
9916 || strncmp (section
, ".gnu.linkonce.s.", 16) == 0
9917 || strcmp (section
, ".sbss") == 0
9918 || strncmp (section
, ".sbss.", 6) == 0
9919 || strncmp (section
, ".gnu.linkonce.sb.", 17) == 0)
9924 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
9926 /* If this is an incomplete type with size 0, then we can't put it
9927 in sdata because it might be too big when completed. */
9928 if (size
> 0 && size
<= ia64_section_threshold
)
9935 /* Output assembly directives for prologue regions. */
9937 /* The current basic block number. */
9939 static bool last_block
;
9941 /* True if we need a copy_state command at the start of the next block. */
9943 static bool need_copy_state
;
9945 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9946 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9949 /* The function emits unwind directives for the start of an epilogue. */
9952 process_epilogue (FILE *asm_out_file
, rtx insn ATTRIBUTE_UNUSED
,
9953 bool unwind
, bool frame ATTRIBUTE_UNUSED
)
9955 /* If this isn't the last block of the function, then we need to label the
9956 current state, and copy it back in at the start of the next block. */
9961 fprintf (asm_out_file
, "\t.label_state %d\n",
9962 ++cfun
->machine
->state_num
);
9963 need_copy_state
= true;
9967 fprintf (asm_out_file
, "\t.restore sp\n");
9970 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
9973 process_cfa_adjust_cfa (FILE *asm_out_file
, rtx pat
, rtx insn
,
9974 bool unwind
, bool frame
)
9976 rtx dest
= SET_DEST (pat
);
9977 rtx src
= SET_SRC (pat
);
9979 if (dest
== stack_pointer_rtx
)
9981 if (GET_CODE (src
) == PLUS
)
9983 rtx op0
= XEXP (src
, 0);
9984 rtx op1
= XEXP (src
, 1);
9986 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
9988 if (INTVAL (op1
) < 0)
9990 gcc_assert (!frame_pointer_needed
);
9992 fprintf (asm_out_file
,
9993 "\t.fframe " HOST_WIDE_INT_PRINT_DEC
"\n",
9997 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10001 gcc_assert (src
== hard_frame_pointer_rtx
);
10002 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10005 else if (dest
== hard_frame_pointer_rtx
)
10007 gcc_assert (src
== stack_pointer_rtx
);
10008 gcc_assert (frame_pointer_needed
);
10011 fprintf (asm_out_file
, "\t.vframe r%d\n",
10012 ia64_dbx_register_number (REGNO (dest
)));
10015 gcc_unreachable ();
10018 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10021 process_cfa_register (FILE *asm_out_file
, rtx pat
, bool unwind
)
10023 rtx dest
= SET_DEST (pat
);
10024 rtx src
= SET_SRC (pat
);
10025 int dest_regno
= REGNO (dest
);
10030 /* Saving return address pointer. */
10032 fprintf (asm_out_file
, "\t.save rp, r%d\n",
10033 ia64_dbx_register_number (dest_regno
));
10037 src_regno
= REGNO (src
);
10042 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
10044 fprintf (asm_out_file
, "\t.save pr, r%d\n",
10045 ia64_dbx_register_number (dest_regno
));
10048 case AR_UNAT_REGNUM
:
10049 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
10051 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
10052 ia64_dbx_register_number (dest_regno
));
10056 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
10058 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
10059 ia64_dbx_register_number (dest_regno
));
10063 /* Everything else should indicate being stored to memory. */
10064 gcc_unreachable ();
10068 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10071 process_cfa_offset (FILE *asm_out_file
, rtx pat
, bool unwind
)
10073 rtx dest
= SET_DEST (pat
);
10074 rtx src
= SET_SRC (pat
);
10075 int src_regno
= REGNO (src
);
10076 const char *saveop
;
10080 gcc_assert (MEM_P (dest
));
10081 if (GET_CODE (XEXP (dest
, 0)) == REG
)
10083 base
= XEXP (dest
, 0);
10088 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
10089 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
10090 base
= XEXP (XEXP (dest
, 0), 0);
10091 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
10094 if (base
== hard_frame_pointer_rtx
)
10096 saveop
= ".savepsp";
10101 gcc_assert (base
== stack_pointer_rtx
);
10102 saveop
= ".savesp";
10105 src_regno
= REGNO (src
);
10109 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
10111 fprintf (asm_out_file
, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC
"\n",
10116 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
10118 fprintf (asm_out_file
, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC
"\n",
10123 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
10125 fprintf (asm_out_file
, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC
"\n",
10129 case AR_PFS_REGNUM
:
10130 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
10132 fprintf (asm_out_file
, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC
"\n",
10136 case AR_UNAT_REGNUM
:
10137 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
10139 fprintf (asm_out_file
, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC
"\n",
10148 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
10149 1 << (src_regno
- GR_REG (4)));
10158 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
10159 1 << (src_regno
- BR_REG (1)));
10167 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
10168 1 << (src_regno
- FR_REG (2)));
10171 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10172 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10173 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10174 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10176 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
10177 1 << (src_regno
- FR_REG (12)));
10181 /* ??? For some reason we mark other general registers, even those
10182 we can't represent in the unwind info. Ignore them. */
10187 /* This function looks at a single insn and emits any directives
10188 required to unwind this insn. */
10191 ia64_asm_unwind_emit (FILE *asm_out_file
, rtx_insn
*insn
)
10193 bool unwind
= ia64_except_unwind_info (&global_options
) == UI_TARGET
;
10194 bool frame
= dwarf2out_do_frame ();
10198 if (!unwind
&& !frame
)
10201 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
10203 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
10204 == EXIT_BLOCK_PTR_FOR_FN (cfun
);
10206 /* Restore unwind state from immediately before the epilogue. */
10207 if (need_copy_state
)
10211 fprintf (asm_out_file
, "\t.body\n");
10212 fprintf (asm_out_file
, "\t.copy_state %d\n",
10213 cfun
->machine
->state_num
);
10215 need_copy_state
= false;
10219 if (NOTE_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
10222 /* Look for the ALLOC insn. */
10223 if (INSN_CODE (insn
) == CODE_FOR_alloc
)
10225 rtx dest
= SET_DEST (XVECEXP (PATTERN (insn
), 0, 0));
10226 int dest_regno
= REGNO (dest
);
10228 /* If this is the final destination for ar.pfs, then this must
10229 be the alloc in the prologue. */
10230 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
10233 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
10234 ia64_dbx_register_number (dest_regno
));
10238 /* This must be an alloc before a sibcall. We must drop the
10239 old frame info. The easiest way to drop the old frame
10240 info is to ensure we had a ".restore sp" directive
10241 followed by a new prologue. If the procedure doesn't
10242 have a memory-stack frame, we'll issue a dummy ".restore
10244 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
10245 /* if haven't done process_epilogue() yet, do it now */
10246 process_epilogue (asm_out_file
, insn
, unwind
, frame
);
10248 fprintf (asm_out_file
, "\t.prologue\n");
10253 handled_one
= false;
10254 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
10255 switch (REG_NOTE_KIND (note
))
10257 case REG_CFA_ADJUST_CFA
:
10258 pat
= XEXP (note
, 0);
10260 pat
= PATTERN (insn
);
10261 process_cfa_adjust_cfa (asm_out_file
, pat
, insn
, unwind
, frame
);
10262 handled_one
= true;
10265 case REG_CFA_OFFSET
:
10266 pat
= XEXP (note
, 0);
10268 pat
= PATTERN (insn
);
10269 process_cfa_offset (asm_out_file
, pat
, unwind
);
10270 handled_one
= true;
10273 case REG_CFA_REGISTER
:
10274 pat
= XEXP (note
, 0);
10276 pat
= PATTERN (insn
);
10277 process_cfa_register (asm_out_file
, pat
, unwind
);
10278 handled_one
= true;
10281 case REG_FRAME_RELATED_EXPR
:
10282 case REG_CFA_DEF_CFA
:
10283 case REG_CFA_EXPRESSION
:
10284 case REG_CFA_RESTORE
:
10285 case REG_CFA_SET_VDRAP
:
10286 /* Not used in the ia64 port. */
10287 gcc_unreachable ();
10290 /* Not a frame-related note. */
10294 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10295 explicit action to take. No guessing required. */
10296 gcc_assert (handled_one
);
10299 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10302 ia64_asm_emit_except_personality (rtx personality
)
10304 fputs ("\t.personality\t", asm_out_file
);
10305 output_addr_const (asm_out_file
, personality
);
10306 fputc ('\n', asm_out_file
);
10309 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10312 ia64_asm_init_sections (void)
10314 exception_section
= get_unnamed_section (0, output_section_asm_op
,
10318 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10320 static enum unwind_info_type
10321 ia64_debug_unwind_info (void)
10329 IA64_BUILTIN_COPYSIGNQ
,
10330 IA64_BUILTIN_FABSQ
,
10331 IA64_BUILTIN_FLUSHRS
,
10333 IA64_BUILTIN_HUGE_VALQ
,
10335 IA64_BUILTIN_NANSQ
,
10339 static GTY(()) tree ia64_builtins
[(int) IA64_BUILTIN_max
];
10342 ia64_init_builtins (void)
10348 /* The __fpreg type. */
10349 fpreg_type
= make_node (REAL_TYPE
);
10350 TYPE_PRECISION (fpreg_type
) = 82;
10351 layout_type (fpreg_type
);
10352 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
10354 /* The __float80 type. */
10355 if (float64x_type_node
!= NULL_TREE
10356 && TYPE_MODE (float64x_type_node
) == XFmode
)
10357 float80_type
= float64x_type_node
;
10360 float80_type
= make_node (REAL_TYPE
);
10361 TYPE_PRECISION (float80_type
) = 80;
10362 layout_type (float80_type
);
10364 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
10366 /* The __float128 type. */
10370 tree const_string_type
10371 = build_pointer_type (build_qualified_type
10372 (char_type_node
, TYPE_QUAL_CONST
));
10374 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
10377 /* TFmode support builtins. */
10378 ftype
= build_function_type_list (float128_type_node
, NULL_TREE
);
10379 decl
= add_builtin_function ("__builtin_infq", ftype
,
10380 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
10382 ia64_builtins
[IA64_BUILTIN_INFQ
] = decl
;
10384 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
10385 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
10387 ia64_builtins
[IA64_BUILTIN_HUGE_VALQ
] = decl
;
10389 ftype
= build_function_type_list (float128_type_node
,
10392 decl
= add_builtin_function ("__builtin_nanq", ftype
,
10393 IA64_BUILTIN_NANQ
, BUILT_IN_MD
,
10394 "nanq", NULL_TREE
);
10395 TREE_READONLY (decl
) = 1;
10396 ia64_builtins
[IA64_BUILTIN_NANQ
] = decl
;
10398 decl
= add_builtin_function ("__builtin_nansq", ftype
,
10399 IA64_BUILTIN_NANSQ
, BUILT_IN_MD
,
10400 "nansq", NULL_TREE
);
10401 TREE_READONLY (decl
) = 1;
10402 ia64_builtins
[IA64_BUILTIN_NANSQ
] = decl
;
10404 ftype
= build_function_type_list (float128_type_node
,
10405 float128_type_node
,
10407 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
10408 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
10409 "__fabstf2", NULL_TREE
);
10410 TREE_READONLY (decl
) = 1;
10411 ia64_builtins
[IA64_BUILTIN_FABSQ
] = decl
;
10413 ftype
= build_function_type_list (float128_type_node
,
10414 float128_type_node
,
10415 float128_type_node
,
10417 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
10418 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
10419 "__copysigntf3", NULL_TREE
);
10420 TREE_READONLY (decl
) = 1;
10421 ia64_builtins
[IA64_BUILTIN_COPYSIGNQ
] = decl
;
10424 /* Under HPUX, this is a synonym for "long double". */
10425 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
10428 /* Fwrite on VMS is non-standard. */
10429 #if TARGET_ABI_OPEN_VMS
10430 vms_patch_builtins ();
10433 #define def_builtin(name, type, code) \
10434 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10437 decl
= def_builtin ("__builtin_ia64_bsp",
10438 build_function_type_list (ptr_type_node
, NULL_TREE
),
10440 ia64_builtins
[IA64_BUILTIN_BSP
] = decl
;
10442 decl
= def_builtin ("__builtin_ia64_flushrs",
10443 build_function_type_list (void_type_node
, NULL_TREE
),
10444 IA64_BUILTIN_FLUSHRS
);
10445 ia64_builtins
[IA64_BUILTIN_FLUSHRS
] = decl
;
10451 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
10452 set_user_assembler_name (decl
, "_Isfinite");
10453 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
10454 set_user_assembler_name (decl
, "_Isfinitef");
10455 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEL
)) != NULL_TREE
)
10456 set_user_assembler_name (decl
, "_Isfinitef128");
10461 ia64_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
10462 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
10464 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
10466 enum ia64_builtins fn_code
= (enum ia64_builtins
)
10467 DECL_FUNCTION_CODE (fndecl
);
10470 case IA64_BUILTIN_NANQ
:
10471 case IA64_BUILTIN_NANSQ
:
10473 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
10474 const char *str
= c_getstr (*args
);
10475 int quiet
= fn_code
== IA64_BUILTIN_NANQ
;
10476 REAL_VALUE_TYPE real
;
10478 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
10479 return build_real (type
, real
);
10488 #ifdef SUBTARGET_FOLD_BUILTIN
10489 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
10496 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
10497 machine_mode mode ATTRIBUTE_UNUSED
,
10498 int ignore ATTRIBUTE_UNUSED
)
10500 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
10501 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
10505 case IA64_BUILTIN_BSP
:
10506 if (! target
|| ! register_operand (target
, DImode
))
10507 target
= gen_reg_rtx (DImode
);
10508 emit_insn (gen_bsp_value (target
));
10509 #ifdef POINTERS_EXTEND_UNSIGNED
10510 target
= convert_memory_address (ptr_mode
, target
);
10514 case IA64_BUILTIN_FLUSHRS
:
10515 emit_insn (gen_flushrs ());
10518 case IA64_BUILTIN_INFQ
:
10519 case IA64_BUILTIN_HUGE_VALQ
:
10521 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
10522 REAL_VALUE_TYPE inf
;
10526 tmp
= const_double_from_real_value (inf
, target_mode
);
10528 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
10531 target
= gen_reg_rtx (target_mode
);
10533 emit_move_insn (target
, tmp
);
10537 case IA64_BUILTIN_NANQ
:
10538 case IA64_BUILTIN_NANSQ
:
10539 case IA64_BUILTIN_FABSQ
:
10540 case IA64_BUILTIN_COPYSIGNQ
:
10541 return expand_call (exp
, target
, ignore
);
10544 gcc_unreachable ();
10550 /* Return the ia64 builtin for CODE. */
10553 ia64_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
10555 if (code
>= IA64_BUILTIN_max
)
10556 return error_mark_node
;
10558 return ia64_builtins
[code
];
10561 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10562 most significant bits of the stack slot. */
10565 ia64_hpux_function_arg_padding (machine_mode mode
, const_tree type
)
10567 /* Exception to normal case for structures/unions/etc. */
10569 if (type
&& AGGREGATE_TYPE_P (type
)
10570 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
10573 /* Fall back to the default. */
10574 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
10577 /* Emit text to declare externally defined variables and functions, because
10578 the Intel assembler does not support undefined externals. */
10581 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10583 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10584 set in order to avoid putting out names that are never really
10586 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10588 /* maybe_assemble_visibility will return 1 if the assembler
10589 visibility directive is output. */
10590 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10591 && maybe_assemble_visibility (decl
));
10593 /* GNU as does not need anything here, but the HP linker does
10594 need something for external functions. */
10595 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10596 && TREE_CODE (decl
) == FUNCTION_DECL
)
10597 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10598 else if (need_visibility
&& !TARGET_GNU_AS
)
10599 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10603 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10604 modes of word_mode and larger. Rename the TFmode libfuncs using the
10605 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10606 backward compatibility. */
10609 ia64_init_libfuncs (void)
10611 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10612 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10613 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10614 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10616 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10617 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10618 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10619 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10620 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10622 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10623 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10624 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10625 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10626 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10627 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10629 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10630 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10631 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10632 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10633 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10635 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10636 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10637 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10638 /* HP-UX 11.23 libc does not have a function for unsigned
10639 SImode-to-TFmode conversion. */
10640 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10643 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10646 ia64_hpux_init_libfuncs (void)
10648 ia64_init_libfuncs ();
10650 /* The HP SI millicode division and mod functions expect DI arguments.
10651 By turning them off completely we avoid using both libgcc and the
10652 non-standard millicode routines and use the HP DI millicode routines
10655 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10656 set_optab_libfunc (udiv_optab
, SImode
, 0);
10657 set_optab_libfunc (smod_optab
, SImode
, 0);
10658 set_optab_libfunc (umod_optab
, SImode
, 0);
10660 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10661 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10662 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10663 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10665 /* HP-UX libc has TF min/max/abs routines in it. */
10666 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10667 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10668 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10670 /* ia64_expand_compare uses this. */
10671 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10673 /* These should never be used. */
10674 set_optab_libfunc (eq_optab
, TFmode
, 0);
10675 set_optab_libfunc (ne_optab
, TFmode
, 0);
10676 set_optab_libfunc (gt_optab
, TFmode
, 0);
10677 set_optab_libfunc (ge_optab
, TFmode
, 0);
10678 set_optab_libfunc (lt_optab
, TFmode
, 0);
10679 set_optab_libfunc (le_optab
, TFmode
, 0);
10682 /* Rename the division and modulus functions in VMS. */
10685 ia64_vms_init_libfuncs (void)
10687 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10688 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10689 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10690 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10691 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10692 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10693 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10694 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10695 #ifdef MEM_LIBFUNCS_INIT
10700 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10701 the HPUX conventions. */
10704 ia64_sysv4_init_libfuncs (void)
10706 ia64_init_libfuncs ();
10708 /* These functions are not part of the HPUX TFmode interface. We
10709 use them instead of _U_Qfcmp, which doesn't work the way we
10711 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10712 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10713 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10714 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10715 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10716 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10718 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10719 glibc doesn't have them. */
10725 ia64_soft_fp_init_libfuncs (void)
10730 ia64_vms_valid_pointer_mode (machine_mode mode
)
10732 return (mode
== SImode
|| mode
== DImode
);
10735 /* For HPUX, it is illegal to have relocations in shared segments. */
10738 ia64_hpux_reloc_rw_mask (void)
10743 /* For others, relax this so that relocations to local data goes in
10744 read-only segments, but we still cannot allow global relocations
10745 in read-only segments. */
10748 ia64_reloc_rw_mask (void)
10750 return flag_pic
? 3 : 2;
10753 /* Return the section to use for X. The only special thing we do here
10754 is to honor small data. */
10757 ia64_select_rtx_section (machine_mode mode
, rtx x
,
10758 unsigned HOST_WIDE_INT align
)
10760 if (GET_MODE_SIZE (mode
) > 0
10761 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10762 && !TARGET_NO_SDATA
)
10763 return sdata_section
;
10765 return default_elf_select_rtx_section (mode
, x
, align
);
10768 static unsigned int
10769 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10771 unsigned int flags
= 0;
10773 if (strcmp (name
, ".sdata") == 0
10774 || strncmp (name
, ".sdata.", 7) == 0
10775 || strncmp (name
, ".gnu.linkonce.s.", 16) == 0
10776 || strncmp (name
, ".sdata2.", 8) == 0
10777 || strncmp (name
, ".gnu.linkonce.s2.", 17) == 0
10778 || strcmp (name
, ".sbss") == 0
10779 || strncmp (name
, ".sbss.", 6) == 0
10780 || strncmp (name
, ".gnu.linkonce.sb.", 17) == 0)
10781 flags
= SECTION_SMALL
;
10783 flags
|= default_section_type_flags (decl
, name
, reloc
);
10787 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10788 structure type and that the address of that type should be passed
10789 in out0, rather than in r8. */
10792 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10794 tree ret_type
= TREE_TYPE (fntype
);
10796 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10797 as the structure return address parameter, if the return value
10798 type has a non-trivial copy constructor or destructor. It is not
10799 clear if this same convention should be used for other
10800 programming languages. Until G++ 3.4, we incorrectly used r8 for
10801 these return values. */
10802 return (abi_version_at_least (2)
10804 && TYPE_MODE (ret_type
) == BLKmode
10805 && TREE_ADDRESSABLE (ret_type
)
10806 && lang_GNU_CXX ());
10809 /* Output the assembler code for a thunk function. THUNK_DECL is the
10810 declaration for the thunk function itself, FUNCTION is the decl for
10811 the target function. DELTA is an immediate constant offset to be
10812 added to THIS. If VCALL_OFFSET is nonzero, the word at
10813 *(*this + vcall_offset) should be added to THIS. */
10816 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10817 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10820 rtx this_rtx
, funexp
;
10822 unsigned int this_parmno
;
10823 unsigned int this_regno
;
10826 reload_completed
= 1;
10827 epilogue_completed
= 1;
10829 /* Set things up as ia64_expand_prologue might. */
10830 last_scratch_gr_reg
= 15;
10832 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10833 current_frame_info
.spill_cfa_off
= -16;
10834 current_frame_info
.n_input_regs
= 1;
10835 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10837 /* Mark the end of the (empty) prologue. */
10838 emit_note (NOTE_INSN_PROLOGUE_END
);
10840 /* Figure out whether "this" will be the first parameter (the
10841 typical case) or the second parameter (as happens when the
10842 virtual function returns certain class objects). */
10844 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10846 this_regno
= IN_REG (this_parmno
);
10847 if (!TARGET_REG_NAMES
)
10848 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10850 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10852 /* Apply the constant offset, if required. */
10853 delta_rtx
= GEN_INT (delta
);
10856 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10857 REG_POINTER (tmp
) = 1;
10858 if (delta
&& satisfies_constraint_I (delta_rtx
))
10860 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10864 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10868 if (!satisfies_constraint_I (delta_rtx
))
10870 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10871 emit_move_insn (tmp
, delta_rtx
);
10874 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
10877 /* Apply the offset from the vtable, if required. */
10880 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
10881 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10885 rtx t
= gen_rtx_REG (ptr_mode
, 2);
10886 REG_POINTER (t
) = 1;
10887 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
10888 if (satisfies_constraint_I (vcall_offset_rtx
))
10890 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
10894 emit_insn (gen_ptr_extend (tmp
, t
));
10897 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
10901 if (!satisfies_constraint_J (vcall_offset_rtx
))
10903 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
10904 emit_move_insn (tmp2
, vcall_offset_rtx
);
10905 vcall_offset_rtx
= tmp2
;
10907 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
10911 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
10913 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
10915 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
10918 /* Generate a tail call to the target function. */
10919 if (! TREE_USED (function
))
10921 assemble_external (function
);
10922 TREE_USED (function
) = 1;
10924 funexp
= XEXP (DECL_RTL (function
), 0);
10925 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
10926 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
10927 insn
= get_last_insn ();
10928 SIBLING_CALL_P (insn
) = 1;
10930 /* Code generation for calls relies on splitting. */
10931 reload_completed
= 1;
10932 epilogue_completed
= 1;
10933 try_split (PATTERN (insn
), insn
, 0);
10937 /* Run just enough of rest_of_compilation to get the insns emitted.
10938 There's not really enough bulk here to make other passes such as
10939 instruction scheduling worth while. Note that use_thunk calls
10940 assemble_start_function and assemble_end_function. */
10942 emit_all_insn_group_barriers (NULL
);
10943 insn
= get_insns ();
10944 shorten_branches (insn
);
10945 final_start_function (insn
, file
, 1);
10946 final (insn
, file
, 1);
10947 final_end_function ();
10949 reload_completed
= 0;
10950 epilogue_completed
= 0;
10953 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10956 ia64_struct_value_rtx (tree fntype
,
10957 int incoming ATTRIBUTE_UNUSED
)
10959 if (TARGET_ABI_OPEN_VMS
||
10960 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
10962 return gen_rtx_REG (Pmode
, GR_REG (8));
10966 ia64_scalar_mode_supported_p (machine_mode mode
)
10992 ia64_vector_mode_supported_p (machine_mode mode
)
11009 /* Implement the FUNCTION_PROFILER macro. */
11012 ia64_output_function_profiler (FILE *file
, int labelno
)
11014 bool indirect_call
;
11016 /* If the function needs a static chain and the static chain
11017 register is r15, we use an indirect call so as to bypass
11018 the PLT stub in case the executable is dynamically linked,
11019 because the stub clobbers r15 as per 5.3.6 of the psABI.
11020 We don't need to do that in non canonical PIC mode. */
11022 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
11024 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
11025 indirect_call
= true;
11028 indirect_call
= false;
11031 fputs ("\t.prologue 4, r40\n", file
);
11033 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
11034 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
11036 if (NO_PROFILE_COUNTERS
)
11037 fputs ("\tmov out3 = r0\n", file
);
11041 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11043 if (TARGET_AUTO_PIC
)
11044 fputs ("\tmovl out3 = @gprel(", file
);
11046 fputs ("\taddl out3 = @ltoff(", file
);
11047 assemble_name (file
, buf
);
11048 if (TARGET_AUTO_PIC
)
11049 fputs (")\n", file
);
11051 fputs ("), r1\n", file
);
11055 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
11056 fputs ("\t;;\n", file
);
11058 fputs ("\t.save rp, r42\n", file
);
11059 fputs ("\tmov out2 = b0\n", file
);
11061 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
11062 fputs ("\t.body\n", file
);
11063 fputs ("\tmov out1 = r1\n", file
);
11066 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
11067 fputs ("\tmov b6 = r16\n", file
);
11068 fputs ("\tld8 r1 = [r14]\n", file
);
11069 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
11072 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
11075 static GTY(()) rtx mcount_func_rtx
;
11077 gen_mcount_func_rtx (void)
11079 if (!mcount_func_rtx
)
11080 mcount_func_rtx
= init_one_libfunc ("_mcount");
11081 return mcount_func_rtx
;
11085 ia64_profile_hook (int labelno
)
11089 if (NO_PROFILE_COUNTERS
)
11090 label
= const0_rtx
;
11094 const char *label_name
;
11095 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11096 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
11097 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
11098 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
11100 ip
= gen_reg_rtx (Pmode
);
11101 emit_insn (gen_ip_value (ip
));
11102 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
11104 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
11109 /* Return the mangling of TYPE if it is an extended fundamental type. */
11111 static const char *
11112 ia64_mangle_type (const_tree type
)
11114 type
= TYPE_MAIN_VARIANT (type
);
11116 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
11117 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
11120 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11122 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
11124 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11125 an extended mangling. Elsewhere, "e" is available since long
11126 double is 80 bits. */
11127 if (TYPE_MODE (type
) == XFmode
)
11128 return TARGET_HPUX
? "u9__float80" : "e";
11129 if (TYPE_MODE (type
) == RFmode
)
11130 return "u7__fpreg";
11134 /* Return the diagnostic message string if conversion from FROMTYPE to
11135 TOTYPE is not allowed, NULL otherwise. */
11136 static const char *
11137 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
11139 /* Reject nontrivial conversion to or from __fpreg. */
11140 if (TYPE_MODE (fromtype
) == RFmode
11141 && TYPE_MODE (totype
) != RFmode
11142 && TYPE_MODE (totype
) != VOIDmode
)
11143 return N_("invalid conversion from %<__fpreg%>");
11144 if (TYPE_MODE (totype
) == RFmode
11145 && TYPE_MODE (fromtype
) != RFmode
)
11146 return N_("invalid conversion to %<__fpreg%>");
11150 /* Return the diagnostic message string if the unary operation OP is
11151 not permitted on TYPE, NULL otherwise. */
11152 static const char *
11153 ia64_invalid_unary_op (int op
, const_tree type
)
11155 /* Reject operations on __fpreg other than unary + or &. */
11156 if (TYPE_MODE (type
) == RFmode
11157 && op
!= CONVERT_EXPR
11158 && op
!= ADDR_EXPR
)
11159 return N_("invalid operation on %<__fpreg%>");
11163 /* Return the diagnostic message string if the binary operation OP is
11164 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11165 static const char *
11166 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
11168 /* Reject operations on __fpreg. */
11169 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
11170 return N_("invalid operation on %<__fpreg%>");
11174 /* HP-UX version_id attribute.
11175 For object foo, if the version_id is set to 1234 put out an alias
11176 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11177 other than an alias statement because it is an illegal symbol name. */
11180 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
11181 tree name ATTRIBUTE_UNUSED
,
11183 int flags ATTRIBUTE_UNUSED
,
11184 bool *no_add_attrs
)
11186 tree arg
= TREE_VALUE (args
);
11188 if (TREE_CODE (arg
) != STRING_CST
)
11190 error("version attribute is not a string");
11191 *no_add_attrs
= true;
11197 /* Target hook for c_mode_for_suffix. */
11199 static machine_mode
11200 ia64_c_mode_for_suffix (char suffix
)
11210 static GTY(()) rtx ia64_dconst_0_5_rtx
;
11213 ia64_dconst_0_5 (void)
11215 if (! ia64_dconst_0_5_rtx
)
11217 REAL_VALUE_TYPE rv
;
11218 real_from_string (&rv
, "0.5");
11219 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
11221 return ia64_dconst_0_5_rtx
;
11224 static GTY(()) rtx ia64_dconst_0_375_rtx
;
11227 ia64_dconst_0_375 (void)
11229 if (! ia64_dconst_0_375_rtx
)
11231 REAL_VALUE_TYPE rv
;
11232 real_from_string (&rv
, "0.375");
11233 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
11235 return ia64_dconst_0_375_rtx
;
11238 static machine_mode
11239 ia64_get_reg_raw_mode (int regno
)
11241 if (FR_REGNO_P (regno
))
11243 return default_get_reg_raw_mode(regno
);
11246 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11250 ia64_member_type_forces_blk (const_tree
, machine_mode mode
)
11252 return TARGET_HPUX
&& mode
== TFmode
;
11255 /* Always default to .text section until HP-UX linker is fixed. */
11257 ATTRIBUTE_UNUSED
static section
*
11258 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED
,
11259 enum node_frequency freq ATTRIBUTE_UNUSED
,
11260 bool startup ATTRIBUTE_UNUSED
,
11261 bool exit ATTRIBUTE_UNUSED
)
11266 /* Construct (set target (vec_select op0 (parallel perm))) and
11267 return true if that's a valid instruction in the active ISA. */
11270 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
11272 rtx rperm
[MAX_VECT_LEN
], x
;
11275 for (i
= 0; i
< nelt
; ++i
)
11276 rperm
[i
] = GEN_INT (perm
[i
]);
11278 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
11279 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
11280 x
= gen_rtx_SET (target
, x
);
11282 rtx_insn
*insn
= emit_insn (x
);
11283 if (recog_memoized (insn
) < 0)
11285 remove_insn (insn
);
11291 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11294 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
11295 const unsigned char *perm
, unsigned nelt
)
11297 machine_mode v2mode
;
11300 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
11301 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
11302 return expand_vselect (target
, x
, perm
, nelt
);
11305 /* Try to expand a no-op permutation. */
11308 expand_vec_perm_identity (struct expand_vec_perm_d
*d
)
11310 unsigned i
, nelt
= d
->nelt
;
11312 for (i
= 0; i
< nelt
; ++i
)
11313 if (d
->perm
[i
] != i
)
11317 emit_move_insn (d
->target
, d
->op0
);
11322 /* Try to expand D via a shrp instruction. */
11325 expand_vec_perm_shrp (struct expand_vec_perm_d
*d
)
11327 unsigned i
, nelt
= d
->nelt
, shift
, mask
;
11330 /* ??? Don't force V2SFmode into the integer registers. */
11331 if (d
->vmode
== V2SFmode
)
11334 mask
= (d
->one_operand_p
? nelt
- 1 : 2 * nelt
- 1);
11336 shift
= d
->perm
[0];
11337 if (BYTES_BIG_ENDIAN
&& shift
> nelt
)
11340 for (i
= 1; i
< nelt
; ++i
)
11341 if (d
->perm
[i
] != ((shift
+ i
) & mask
))
11347 hi
= shift
< nelt
? d
->op1
: d
->op0
;
11348 lo
= shift
< nelt
? d
->op0
: d
->op1
;
11352 shift
*= GET_MODE_UNIT_SIZE (d
->vmode
) * BITS_PER_UNIT
;
11354 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11355 gcc_assert (IN_RANGE (shift
, 1, 63));
11357 /* Recall that big-endian elements are numbered starting at the top of
11358 the register. Ideally we'd have a shift-left-pair. But since we
11359 don't, convert to a shift the other direction. */
11360 if (BYTES_BIG_ENDIAN
)
11361 shift
= 64 - shift
;
11363 tmp
= gen_reg_rtx (DImode
);
11364 hi
= gen_lowpart (DImode
, hi
);
11365 lo
= gen_lowpart (DImode
, lo
);
11366 emit_insn (gen_shrp (tmp
, hi
, lo
, GEN_INT (shift
)));
11368 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, tmp
));
11372 /* Try to instantiate D in a single instruction. */
11375 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
11377 unsigned i
, nelt
= d
->nelt
;
11378 unsigned char perm2
[MAX_VECT_LEN
];
11380 /* Try single-operand selections. */
11381 if (d
->one_operand_p
)
11383 if (expand_vec_perm_identity (d
))
11385 if (expand_vselect (d
->target
, d
->op0
, d
->perm
, nelt
))
11389 /* Try two operand selections. */
11390 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
11393 /* Recognize interleave style patterns with reversed operands. */
11394 if (!d
->one_operand_p
)
11396 for (i
= 0; i
< nelt
; ++i
)
11398 unsigned e
= d
->perm
[i
];
11406 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
11410 if (expand_vec_perm_shrp (d
))
11413 /* ??? Look for deposit-like permutations where most of the result
11414 comes from one vector unchanged and the rest comes from a
11415 sequential hunk of the other vector. */
11420 /* Pattern match broadcast permutations. */
11423 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
11425 unsigned i
, elt
, nelt
= d
->nelt
;
11426 unsigned char perm2
[2];
11430 if (!d
->one_operand_p
)
11434 for (i
= 1; i
< nelt
; ++i
)
11435 if (d
->perm
[i
] != elt
)
11442 /* Implementable by interleave. */
11444 perm2
[1] = elt
+ 2;
11445 ok
= expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, 2);
11450 /* Implementable by extract + broadcast. */
11451 if (BYTES_BIG_ENDIAN
)
11453 elt
*= BITS_PER_UNIT
;
11454 temp
= gen_reg_rtx (DImode
);
11455 emit_insn (gen_extzv (temp
, gen_lowpart (DImode
, d
->op0
),
11456 GEN_INT (8), GEN_INT (elt
)));
11457 emit_insn (gen_mux1_brcst_qi (d
->target
, gen_lowpart (QImode
, temp
)));
11461 /* Should have been matched directly by vec_select. */
11463 gcc_unreachable ();
11469 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11470 two vector permutation into a single vector permutation by using
11471 an interleave operation to merge the vectors. */
11474 expand_vec_perm_interleave_2 (struct expand_vec_perm_d
*d
)
11476 struct expand_vec_perm_d dremap
, dfinal
;
11477 unsigned char remap
[2 * MAX_VECT_LEN
];
11478 unsigned contents
, i
, nelt
, nelt2
;
11479 unsigned h0
, h1
, h2
, h3
;
11483 if (d
->one_operand_p
)
11489 /* Examine from whence the elements come. */
11491 for (i
= 0; i
< nelt
; ++i
)
11492 contents
|= 1u << d
->perm
[i
];
11494 memset (remap
, 0xff, sizeof (remap
));
11497 h0
= (1u << nelt2
) - 1;
11500 h3
= h0
<< (nelt
+ nelt2
);
11502 if ((contents
& (h0
| h2
)) == contents
) /* punpck even halves */
11504 for (i
= 0; i
< nelt
; ++i
)
11506 unsigned which
= i
/ 2 + (i
& 1 ? nelt
: 0);
11508 dremap
.perm
[i
] = which
;
11511 else if ((contents
& (h1
| h3
)) == contents
) /* punpck odd halves */
11513 for (i
= 0; i
< nelt
; ++i
)
11515 unsigned which
= i
/ 2 + nelt2
+ (i
& 1 ? nelt
: 0);
11517 dremap
.perm
[i
] = which
;
11520 else if ((contents
& 0x5555) == contents
) /* mix even elements */
11522 for (i
= 0; i
< nelt
; ++i
)
11524 unsigned which
= (i
& ~1) + (i
& 1 ? nelt
: 0);
11526 dremap
.perm
[i
] = which
;
11529 else if ((contents
& 0xaaaa) == contents
) /* mix odd elements */
11531 for (i
= 0; i
< nelt
; ++i
)
11533 unsigned which
= (i
| 1) + (i
& 1 ? nelt
: 0);
11535 dremap
.perm
[i
] = which
;
11538 else if (floor_log2 (contents
) - ctz_hwi (contents
) < (int)nelt
) /* shrp */
11540 unsigned shift
= ctz_hwi (contents
);
11541 for (i
= 0; i
< nelt
; ++i
)
11543 unsigned which
= (i
+ shift
) & (2 * nelt
- 1);
11545 dremap
.perm
[i
] = which
;
11551 /* Use the remapping array set up above to move the elements from their
11552 swizzled locations into their final destinations. */
11554 for (i
= 0; i
< nelt
; ++i
)
11556 unsigned e
= remap
[d
->perm
[i
]];
11557 gcc_assert (e
< nelt
);
11558 dfinal
.perm
[i
] = e
;
11561 dfinal
.op0
= gen_raw_REG (dfinal
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11563 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
11564 dfinal
.op1
= dfinal
.op0
;
11565 dfinal
.one_operand_p
= true;
11566 dremap
.target
= dfinal
.op0
;
11568 /* Test if the final remap can be done with a single insn. For V4HImode
11569 this *will* succeed. For V8QImode or V2SImode it may not. */
11571 ok
= expand_vec_perm_1 (&dfinal
);
11572 seq
= get_insns ();
11579 ok
= expand_vec_perm_1 (&dremap
);
11586 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11587 constant permutation via two mux2 and a merge. */
11590 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d
*d
)
11592 unsigned char perm2
[4];
11595 rtx t0
, t1
, mask
, x
;
11598 if (d
->vmode
!= V4HImode
|| d
->one_operand_p
)
11603 for (i
= 0; i
< 4; ++i
)
11605 perm2
[i
] = d
->perm
[i
] & 3;
11606 rmask
[i
] = (d
->perm
[i
] & 4 ? const0_rtx
: constm1_rtx
);
11608 mask
= gen_rtx_CONST_VECTOR (V4HImode
, gen_rtvec_v (4, rmask
));
11609 mask
= force_reg (V4HImode
, mask
);
11611 t0
= gen_reg_rtx (V4HImode
);
11612 t1
= gen_reg_rtx (V4HImode
);
11614 ok
= expand_vselect (t0
, d
->op0
, perm2
, 4);
11616 ok
= expand_vselect (t1
, d
->op1
, perm2
, 4);
11619 x
= gen_rtx_AND (V4HImode
, mask
, t0
);
11620 emit_insn (gen_rtx_SET (t0
, x
));
11622 x
= gen_rtx_NOT (V4HImode
, mask
);
11623 x
= gen_rtx_AND (V4HImode
, x
, t1
);
11624 emit_insn (gen_rtx_SET (t1
, x
));
11626 x
= gen_rtx_IOR (V4HImode
, t0
, t1
);
11627 emit_insn (gen_rtx_SET (d
->target
, x
));
11632 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11633 With all of the interface bits taken care of, perform the expansion
11634 in D and return true on success. */
11637 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
11639 if (expand_vec_perm_1 (d
))
11641 if (expand_vec_perm_broadcast (d
))
11643 if (expand_vec_perm_interleave_2 (d
))
11645 if (expand_vec_perm_v4hi_5 (d
))
11651 ia64_expand_vec_perm_const (rtx operands
[4])
11653 struct expand_vec_perm_d d
;
11654 unsigned char perm
[MAX_VECT_LEN
];
11655 int i
, nelt
, which
;
11658 d
.target
= operands
[0];
11659 d
.op0
= operands
[1];
11660 d
.op1
= operands
[2];
11663 d
.vmode
= GET_MODE (d
.target
);
11664 gcc_assert (VECTOR_MODE_P (d
.vmode
));
11665 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11666 d
.testing_p
= false;
11668 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
11669 gcc_assert (XVECLEN (sel
, 0) == nelt
);
11670 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
11672 for (i
= which
= 0; i
< nelt
; ++i
)
11674 rtx e
= XVECEXP (sel
, 0, i
);
11675 int ei
= INTVAL (e
) & (2 * nelt
- 1);
11677 which
|= (ei
< nelt
? 1 : 2);
11688 if (!rtx_equal_p (d
.op0
, d
.op1
))
11690 d
.one_operand_p
= false;
11694 /* The elements of PERM do not suggest that only the first operand
11695 is used, but both operands are identical. Allow easier matching
11696 of the permutation by folding the permutation into the single
11698 for (i
= 0; i
< nelt
; ++i
)
11699 if (d
.perm
[i
] >= nelt
)
11705 d
.one_operand_p
= true;
11709 for (i
= 0; i
< nelt
; ++i
)
11712 d
.one_operand_p
= true;
11716 if (ia64_expand_vec_perm_const_1 (&d
))
11719 /* If the mask says both arguments are needed, but they are the same,
11720 the above tried to expand with one_operand_p true. If that didn't
11721 work, retry with one_operand_p false, as that's what we used in _ok. */
11722 if (which
== 3 && d
.one_operand_p
)
11724 memcpy (d
.perm
, perm
, sizeof (perm
));
11725 d
.one_operand_p
= false;
11726 return ia64_expand_vec_perm_const_1 (&d
);
11732 /* Implement targetm.vectorize.vec_perm_const_ok. */
11735 ia64_vectorize_vec_perm_const_ok (machine_mode vmode
,
11736 const unsigned char *sel
)
11738 struct expand_vec_perm_d d
;
11739 unsigned int i
, nelt
, which
;
11743 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11744 d
.testing_p
= true;
11746 /* Extract the values from the vector CST into the permutation
11748 memcpy (d
.perm
, sel
, nelt
);
11749 for (i
= which
= 0; i
< nelt
; ++i
)
11751 unsigned char e
= d
.perm
[i
];
11752 gcc_assert (e
< 2 * nelt
);
11753 which
|= (e
< nelt
? 1 : 2);
11756 /* For all elements from second vector, fold the elements to first. */
11758 for (i
= 0; i
< nelt
; ++i
)
11761 /* Check whether the mask can be applied to the vector type. */
11762 d
.one_operand_p
= (which
!= 3);
11764 /* Otherwise we have to go through the motions and see if we can
11765 figure out how to generate the requested permutation. */
11766 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11767 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
11768 if (!d
.one_operand_p
)
11769 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
11772 ret
= ia64_expand_vec_perm_const_1 (&d
);
11779 ia64_expand_vec_setv2sf (rtx operands
[3])
11781 struct expand_vec_perm_d d
;
11782 unsigned int which
;
11785 d
.target
= operands
[0];
11786 d
.op0
= operands
[0];
11787 d
.op1
= gen_reg_rtx (V2SFmode
);
11788 d
.vmode
= V2SFmode
;
11790 d
.one_operand_p
= false;
11791 d
.testing_p
= false;
11793 which
= INTVAL (operands
[2]);
11794 gcc_assert (which
<= 1);
11795 d
.perm
[0] = 1 - which
;
11796 d
.perm
[1] = which
+ 2;
11798 emit_insn (gen_fpack (d
.op1
, operands
[1], CONST0_RTX (SFmode
)));
11800 ok
= ia64_expand_vec_perm_const_1 (&d
);
11805 ia64_expand_vec_perm_even_odd (rtx target
, rtx op0
, rtx op1
, int odd
)
11807 struct expand_vec_perm_d d
;
11808 machine_mode vmode
= GET_MODE (target
);
11809 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
11817 d
.one_operand_p
= false;
11818 d
.testing_p
= false;
11820 for (i
= 0; i
< nelt
; ++i
)
11821 d
.perm
[i
] = i
* 2 + odd
;
11823 ok
= ia64_expand_vec_perm_const_1 (&d
);
11827 #include "gt-ia64.h"