1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label
= 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def
* ia64_compare_op0
;
63 struct rtx_def
* ia64_compare_op1
;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers
[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names
[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names
[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names
[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string
;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size
= 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string
;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune
;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string
;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2
;
121 /* Determines whether we run variable tracking in machine dependent
123 static int ia64_flag_var_tracking
;
125 /* Variables which are this size or smaller are put in the sdata/sbss
128 unsigned int ia64_section_threshold
;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
144 HARD_REG_SET mask
; /* mask of saved registers. */
145 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled
; /* number of spilled registers. */
148 int reg_fp
; /* register for fp. */
149 int reg_save_b0
; /* save register for b0. */
150 int reg_save_pr
; /* save register for prs. */
151 int reg_save_ar_pfs
; /* save register for ar.pfs. */
152 int reg_save_ar_unat
; /* save register for ar.unat. */
153 int reg_save_ar_lc
; /* save register for ar.lc. */
154 int reg_save_gp
; /* save register for gp. */
155 int n_input_regs
; /* number of input registers used. */
156 int n_local_regs
; /* number of local registers used. */
157 int n_output_regs
; /* number of output registers used. */
158 int n_rotate_regs
; /* number of rotating registers used. */
160 char need_regstk
; /* true if a .regstk directive needed. */
161 char initialized
; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info
;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx
, rtx
);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx
ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx
);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx
, int, int, int *);
173 static rtx
gen_tls_get_addr (void);
174 static rtx
gen_thread_pointer (void);
175 static rtx
ia64_expand_tls_address (enum tls_model
, rtx
, rtx
);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx
, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT
);
180 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
181 static void finish_spill_pointers (void);
182 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
183 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
184 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
185 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
186 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
187 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
189 static enum machine_mode
hfa_element_mode (tree
, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
192 static bool ia64_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
194 static bool ia64_function_ok_for_sibcall (tree
, tree
);
195 static bool ia64_return_in_memory (tree
, tree
);
196 static bool ia64_rtx_costs (rtx
, int, int, int *);
197 static void fix_range (const char *);
198 static struct machine_function
* ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree
);
205 static void process_epilogue (void);
206 static int process_set (FILE *, rtx
);
208 static rtx
ia64_expand_fetch_and_op (optab
, enum machine_mode
, tree
, rtx
);
209 static rtx
ia64_expand_op_and_fetch (optab
, enum machine_mode
, tree
, rtx
);
210 static rtx
ia64_expand_compare_and_swap (enum machine_mode
, enum machine_mode
,
212 static rtx
ia64_expand_lock_test_and_set (enum machine_mode
, tree
, rtx
);
213 static rtx
ia64_expand_lock_release (enum machine_mode
, tree
, rtx
);
214 static bool ia64_assemble_integer (rtx
, unsigned int, int);
215 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
216 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
217 static void ia64_output_function_end_prologue (FILE *);
219 static int ia64_issue_rate (void);
220 static int ia64_adjust_cost (rtx
, rtx
, rtx
, int);
221 static void ia64_sched_init (FILE *, int, int);
222 static void ia64_sched_finish (FILE *, int);
223 static int ia64_dfa_sched_reorder (FILE *, int, rtx
*, int *, int, int);
224 static int ia64_sched_reorder (FILE *, int, rtx
*, int *, int);
225 static int ia64_sched_reorder2 (FILE *, int, rtx
*, int *, int);
226 static int ia64_variable_issue (FILE *, int, rtx
, int);
228 static struct bundle_state
*get_free_bundle_state (void);
229 static void free_bundle_state (struct bundle_state
*);
230 static void initiate_bundle_states (void);
231 static void finish_bundle_states (void);
232 static unsigned bundle_state_hash (const void *);
233 static int bundle_state_eq_p (const void *, const void *);
234 static int insert_bundle_state (struct bundle_state
*);
235 static void initiate_bundle_state_table (void);
236 static void finish_bundle_state_table (void);
237 static int try_issue_nops (struct bundle_state
*, int);
238 static int try_issue_insn (struct bundle_state
*, rtx
);
239 static void issue_nops_and_insn (struct bundle_state
*, int, rtx
, int, int);
240 static int get_max_pos (state_t
);
241 static int get_template (state_t
, int);
243 static rtx
get_next_important_insn (rtx
, rtx
);
244 static void bundling (FILE *, int, rtx
, rtx
);
246 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
247 HOST_WIDE_INT
, tree
);
248 static void ia64_file_start (void);
250 static void ia64_select_rtx_section (enum machine_mode
, rtx
,
251 unsigned HOST_WIDE_INT
);
252 static void ia64_rwreloc_select_section (tree
, int, unsigned HOST_WIDE_INT
)
254 static void ia64_rwreloc_unique_section (tree
, int)
256 static void ia64_rwreloc_select_rtx_section (enum machine_mode
, rtx
,
257 unsigned HOST_WIDE_INT
)
259 static unsigned int ia64_rwreloc_section_type_flags (tree
, const char *, int)
262 static void ia64_hpux_add_extern_decl (tree decl
)
264 static void ia64_hpux_file_end (void)
266 static void ia64_init_libfuncs (void)
268 static void ia64_hpux_init_libfuncs (void)
270 static void ia64_sysv4_init_libfuncs (void)
272 static void ia64_vms_init_libfuncs (void)
275 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
276 static void ia64_encode_section_info (tree
, rtx
, int);
277 static rtx
ia64_struct_value_rtx (tree
, int);
278 static tree
ia64_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
281 /* Table of valid machine attributes. */
282 static const struct attribute_spec ia64_attribute_table
[] =
284 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
285 { "syscall_linkage", 0, 0, false, true, true, NULL
},
286 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
},
287 { NULL
, 0, 0, false, false, false, NULL
}
290 /* Initialize the GCC target structure. */
291 #undef TARGET_ATTRIBUTE_TABLE
292 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS ia64_init_builtins
297 #undef TARGET_EXPAND_BUILTIN
298 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
300 #undef TARGET_ASM_BYTE_OP
301 #define TARGET_ASM_BYTE_OP "\tdata1\t"
302 #undef TARGET_ASM_ALIGNED_HI_OP
303 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
304 #undef TARGET_ASM_ALIGNED_SI_OP
305 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
306 #undef TARGET_ASM_ALIGNED_DI_OP
307 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
308 #undef TARGET_ASM_UNALIGNED_HI_OP
309 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
310 #undef TARGET_ASM_UNALIGNED_SI_OP
311 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
312 #undef TARGET_ASM_UNALIGNED_DI_OP
313 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
314 #undef TARGET_ASM_INTEGER
315 #define TARGET_ASM_INTEGER ia64_assemble_integer
317 #undef TARGET_ASM_FUNCTION_PROLOGUE
318 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
319 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
320 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
321 #undef TARGET_ASM_FUNCTION_EPILOGUE
322 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
324 #undef TARGET_IN_SMALL_DATA_P
325 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
327 #undef TARGET_SCHED_ADJUST_COST
328 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
329 #undef TARGET_SCHED_ISSUE_RATE
330 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
331 #undef TARGET_SCHED_VARIABLE_ISSUE
332 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
333 #undef TARGET_SCHED_INIT
334 #define TARGET_SCHED_INIT ia64_sched_init
335 #undef TARGET_SCHED_FINISH
336 #define TARGET_SCHED_FINISH ia64_sched_finish
337 #undef TARGET_SCHED_REORDER
338 #define TARGET_SCHED_REORDER ia64_sched_reorder
339 #undef TARGET_SCHED_REORDER2
340 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
342 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
343 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
345 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
346 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
348 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
349 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
350 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
351 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
353 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
354 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
355 ia64_first_cycle_multipass_dfa_lookahead_guard
357 #undef TARGET_SCHED_DFA_NEW_CYCLE
358 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
360 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
361 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
365 #undef TARGET_ASM_OUTPUT_MI_THUNK
366 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
367 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
368 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
370 #undef TARGET_ASM_FILE_START
371 #define TARGET_ASM_FILE_START ia64_file_start
373 #undef TARGET_RTX_COSTS
374 #define TARGET_RTX_COSTS ia64_rtx_costs
375 #undef TARGET_ADDRESS_COST
376 #define TARGET_ADDRESS_COST hook_int_rtx_0
378 #undef TARGET_MACHINE_DEPENDENT_REORG
379 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
381 #undef TARGET_ENCODE_SECTION_INFO
382 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
384 /* ??? ABI doesn't allow us to define this. */
386 #undef TARGET_PROMOTE_FUNCTION_ARGS
387 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
390 /* ??? ABI doesn't allow us to define this. */
392 #undef TARGET_PROMOTE_FUNCTION_RETURN
393 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
396 /* ??? Investigate. */
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
402 #undef TARGET_STRUCT_VALUE_RTX
403 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
404 #undef TARGET_RETURN_IN_MEMORY
405 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406 #undef TARGET_SETUP_INCOMING_VARARGS
407 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
408 #undef TARGET_STRICT_ARGUMENT_NAMING
409 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
410 #undef TARGET_MUST_PASS_IN_STACK
411 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
413 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
414 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
416 #undef TARGET_UNWIND_EMIT
417 #define TARGET_UNWIND_EMIT process_for_unwind_directive
419 struct gcc_target targetm
= TARGET_INITIALIZER
;
423 ADDR_AREA_NORMAL
, /* normal address area */
424 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
428 static GTY(()) tree small_ident1
;
429 static GTY(()) tree small_ident2
;
434 if (small_ident1
== 0)
436 small_ident1
= get_identifier ("small");
437 small_ident2
= get_identifier ("__small__");
441 /* Retrieve the address area that has been chosen for the given decl. */
443 static ia64_addr_area
444 ia64_get_addr_area (tree decl
)
448 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
454 id
= TREE_VALUE (TREE_VALUE (model_attr
));
455 if (id
== small_ident1
|| id
== small_ident2
)
456 return ADDR_AREA_SMALL
;
458 return ADDR_AREA_NORMAL
;
462 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
, int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
464 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
466 tree arg
, decl
= *node
;
469 arg
= TREE_VALUE (args
);
470 if (arg
== small_ident1
|| arg
== small_ident2
)
472 addr_area
= ADDR_AREA_SMALL
;
476 warning ("invalid argument of `%s' attribute",
477 IDENTIFIER_POINTER (name
));
478 *no_add_attrs
= true;
481 switch (TREE_CODE (decl
))
484 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
486 && !TREE_STATIC (decl
))
488 error ("%Jan address area attribute cannot be specified for "
489 "local variables", decl
, decl
);
490 *no_add_attrs
= true;
492 area
= ia64_get_addr_area (decl
);
493 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
495 error ("%Jaddress area of '%s' conflicts with previous "
496 "declaration", decl
, decl
);
497 *no_add_attrs
= true;
502 error ("%Jaddress area attribute cannot be specified for functions",
504 *no_add_attrs
= true;
508 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
509 *no_add_attrs
= true;
517 ia64_encode_addr_area (tree decl
, rtx symbol
)
521 flags
= SYMBOL_REF_FLAGS (symbol
);
522 switch (ia64_get_addr_area (decl
))
524 case ADDR_AREA_NORMAL
: break;
525 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
528 SYMBOL_REF_FLAGS (symbol
) = flags
;
532 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
534 default_encode_section_info (decl
, rtl
, first
);
536 /* Careful not to prod global register variables. */
537 if (TREE_CODE (decl
) == VAR_DECL
538 && GET_CODE (DECL_RTL (decl
)) == MEM
539 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
540 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
541 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
544 /* Return 1 if the operands of a move are ok. */
547 ia64_move_ok (rtx dst
, rtx src
)
549 /* If we're under init_recog_no_volatile, we'll not be able to use
550 memory_operand. So check the code directly and don't worry about
551 the validity of the underlying address, which should have been
552 checked elsewhere anyway. */
553 if (GET_CODE (dst
) != MEM
)
555 if (GET_CODE (src
) == MEM
)
557 if (register_operand (src
, VOIDmode
))
560 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
561 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
562 return src
== const0_rtx
;
564 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
568 addp4_optimize_ok (rtx op1
, rtx op2
)
570 return (basereg_operand (op1
, GET_MODE(op1
)) !=
571 basereg_operand (op2
, GET_MODE(op2
)));
574 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
575 Return the length of the field, or <= 0 on failure. */
578 ia64_depz_field_mask (rtx rop
, rtx rshift
)
580 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
581 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
583 /* Get rid of the zero bits we're shifting in. */
586 /* We must now have a solid block of 1's at bit 0. */
587 return exact_log2 (op
+ 1);
590 /* Expand a symbolic constant load. */
593 ia64_expand_load_address (rtx dest
, rtx src
)
595 if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (src
))
597 if (GET_CODE (dest
) != REG
)
600 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
601 having to pointer-extend the value afterward. Other forms of address
602 computation below are also more natural to compute as 64-bit quantities.
603 If we've been given an SImode destination register, change it. */
604 if (GET_MODE (dest
) != Pmode
)
605 dest
= gen_rtx_REG (Pmode
, REGNO (dest
));
607 if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_SMALL_ADDR_P (src
))
609 emit_insn (gen_rtx_SET (VOIDmode
, dest
, src
));
612 else if (TARGET_AUTO_PIC
)
614 emit_insn (gen_load_gprel64 (dest
, src
));
617 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
619 emit_insn (gen_load_fptr (dest
, src
));
622 else if (sdata_symbolic_operand (src
, VOIDmode
))
624 emit_insn (gen_load_gprel (dest
, src
));
628 if (GET_CODE (src
) == CONST
629 && GET_CODE (XEXP (src
, 0)) == PLUS
630 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
631 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x3fff) != 0)
633 rtx sym
= XEXP (XEXP (src
, 0), 0);
634 HOST_WIDE_INT ofs
, hi
, lo
;
636 /* Split the offset into a sign extended 14-bit low part
637 and a complementary high part. */
638 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
639 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
642 ia64_expand_load_address (dest
, plus_constant (sym
, hi
));
643 emit_insn (gen_adddi3 (dest
, dest
, GEN_INT (lo
)));
649 tmp
= gen_rtx_HIGH (Pmode
, src
);
650 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
651 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
653 tmp
= gen_rtx_LO_SUM (GET_MODE (dest
), dest
, src
);
654 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
658 static GTY(()) rtx gen_tls_tga
;
660 gen_tls_get_addr (void)
663 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
667 static GTY(()) rtx thread_pointer_rtx
;
669 gen_thread_pointer (void)
671 if (!thread_pointer_rtx
)
672 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
673 return thread_pointer_rtx
;
677 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
)
679 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
684 case TLS_MODEL_GLOBAL_DYNAMIC
:
687 tga_op1
= gen_reg_rtx (Pmode
);
688 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
689 tga_op1
= gen_const_mem (Pmode
, tga_op1
);
691 tga_op2
= gen_reg_rtx (Pmode
);
692 emit_insn (gen_load_ltoff_dtprel (tga_op2
, op1
));
693 tga_op2
= gen_const_mem (Pmode
, tga_op2
);
695 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
696 LCT_CONST
, Pmode
, 2, tga_op1
,
697 Pmode
, tga_op2
, Pmode
);
699 insns
= get_insns ();
702 if (GET_MODE (op0
) != Pmode
)
704 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
707 case TLS_MODEL_LOCAL_DYNAMIC
:
708 /* ??? This isn't the completely proper way to do local-dynamic
709 If the call to __tls_get_addr is used only by a single symbol,
710 then we should (somehow) move the dtprel to the second arg
711 to avoid the extra add. */
714 tga_op1
= gen_reg_rtx (Pmode
);
715 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
716 tga_op1
= gen_const_mem (Pmode
, tga_op1
);
718 tga_op2
= const0_rtx
;
720 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
721 LCT_CONST
, Pmode
, 2, tga_op1
,
722 Pmode
, tga_op2
, Pmode
);
724 insns
= get_insns ();
727 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
729 tmp
= gen_reg_rtx (Pmode
);
730 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
732 if (!register_operand (op0
, Pmode
))
733 op0
= gen_reg_rtx (Pmode
);
736 emit_insn (gen_load_dtprel (op0
, op1
));
737 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
740 emit_insn (gen_add_dtprel (op0
, tmp
, op1
));
743 case TLS_MODEL_INITIAL_EXEC
:
744 tmp
= gen_reg_rtx (Pmode
);
745 emit_insn (gen_load_ltoff_tprel (tmp
, op1
));
746 tmp
= gen_const_mem (Pmode
, tmp
);
747 tmp
= force_reg (Pmode
, tmp
);
749 if (!register_operand (op0
, Pmode
))
750 op0
= gen_reg_rtx (Pmode
);
751 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
754 case TLS_MODEL_LOCAL_EXEC
:
755 if (!register_operand (op0
, Pmode
))
756 op0
= gen_reg_rtx (Pmode
);
759 emit_insn (gen_load_tprel (op0
, op1
));
760 emit_insn (gen_adddi3 (op0
, gen_thread_pointer (), op0
));
763 emit_insn (gen_add_tprel (op0
, gen_thread_pointer (), op1
));
772 if (GET_MODE (orig_op0
) == Pmode
)
774 return gen_lowpart (GET_MODE (orig_op0
), op0
);
778 ia64_expand_move (rtx op0
, rtx op1
)
780 enum machine_mode mode
= GET_MODE (op0
);
782 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
783 op1
= force_reg (mode
, op1
);
785 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
787 enum tls_model tls_kind
;
788 if (GET_CODE (op1
) == SYMBOL_REF
789 && (tls_kind
= SYMBOL_REF_TLS_MODEL (op1
)))
790 return ia64_expand_tls_address (tls_kind
, op0
, op1
);
792 if (!TARGET_NO_PIC
&& reload_completed
)
794 ia64_expand_load_address (op0
, op1
);
802 /* Split a move from OP1 to OP0 conditional on COND. */
805 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
807 rtx insn
, first
= get_last_insn ();
809 emit_move_insn (op0
, op1
);
811 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
813 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
817 /* Split a post-reload TImode or TFmode reference into two DImode
818 components. This is made extra difficult by the fact that we do
819 not get any scratch registers to work with, because reload cannot
820 be prevented from giving us a scratch that overlaps the register
821 pair involved. So instead, when addressing memory, we tweak the
822 pointer register up and back down with POST_INCs. Or up and not
823 back down when we can get away with it.
825 REVERSED is true when the loads must be done in reversed order
826 (high word first) for correctness. DEAD is true when the pointer
827 dies with the second insn we generate and therefore the second
828 address must not carry a postmodify.
830 May return an insn which is to be emitted after the moves. */
833 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
837 switch (GET_CODE (in
))
840 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
841 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
846 /* Cannot occur reversed. */
847 if (reversed
) abort ();
849 if (GET_MODE (in
) != TFmode
)
850 split_double (in
, &out
[0], &out
[1]);
852 /* split_double does not understand how to split a TFmode
853 quantity into a pair of DImode constants. */
856 unsigned HOST_WIDE_INT p
[2];
857 long l
[4]; /* TFmode is 128 bits */
859 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
860 real_to_target (l
, &r
, TFmode
);
862 if (FLOAT_WORDS_BIG_ENDIAN
)
864 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
865 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
869 p
[0] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
870 p
[1] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
872 out
[0] = GEN_INT (p
[0]);
873 out
[1] = GEN_INT (p
[1]);
879 rtx base
= XEXP (in
, 0);
882 switch (GET_CODE (base
))
887 out
[0] = adjust_automodify_address
888 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
889 out
[1] = adjust_automodify_address
890 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
894 /* Reversal requires a pre-increment, which can only
895 be done as a separate insn. */
896 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
897 out
[0] = adjust_automodify_address
898 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
899 out
[1] = adjust_address (in
, DImode
, 0);
904 if (reversed
|| dead
) abort ();
905 /* Just do the increment in two steps. */
906 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
907 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
911 if (reversed
|| dead
) abort ();
912 /* Add 8, subtract 24. */
913 base
= XEXP (base
, 0);
914 out
[0] = adjust_automodify_address
915 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
916 out
[1] = adjust_automodify_address
918 gen_rtx_POST_MODIFY (Pmode
, base
, plus_constant (base
, -24)),
923 if (reversed
|| dead
) abort ();
924 /* Extract and adjust the modification. This case is
925 trickier than the others, because we might have an
926 index register, or we might have a combined offset that
927 doesn't fit a signed 9-bit displacement field. We can
928 assume the incoming expression is already legitimate. */
929 offset
= XEXP (base
, 1);
930 base
= XEXP (base
, 0);
932 out
[0] = adjust_automodify_address
933 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
935 if (GET_CODE (XEXP (offset
, 1)) == REG
)
937 /* Can't adjust the postmodify to match. Emit the
938 original, then a separate addition insn. */
939 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
940 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
942 else if (GET_CODE (XEXP (offset
, 1)) != CONST_INT
)
944 else if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
946 /* Again the postmodify cannot be made to match, but
947 in this case it's more efficient to get rid of the
948 postmodify entirely and fix up with an add insn. */
949 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
950 fixup
= gen_adddi3 (base
, base
,
951 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
955 /* Combined offset still fits in the displacement field.
956 (We cannot overflow it at the high end.) */
957 out
[1] = adjust_automodify_address
959 gen_rtx_POST_MODIFY (Pmode
, base
,
960 gen_rtx_PLUS (Pmode
, base
,
961 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
979 /* Split a TImode or TFmode move instruction after reload.
980 This is used by *movtf_internal and *movti_internal. */
982 ia64_split_tmode_move (rtx operands
[])
984 rtx in
[2], out
[2], insn
;
987 bool reversed
= false;
989 /* It is possible for reload to decide to overwrite a pointer with
990 the value it points to. In that case we have to do the loads in
991 the appropriate order so that the pointer is not destroyed too
992 early. Also we must not generate a postmodify for that second
993 load, or rws_access_regno will abort. */
994 if (GET_CODE (operands
[1]) == MEM
995 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
997 rtx base
= XEXP (operands
[1], 0);
998 while (GET_CODE (base
) != REG
)
999 base
= XEXP (base
, 0);
1001 if (REGNO (base
) == REGNO (operands
[0]))
1005 /* Another reason to do the moves in reversed order is if the first
1006 element of the target register pair is also the second element of
1007 the source register pair. */
1008 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1009 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1012 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1013 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1015 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1016 if (GET_CODE (EXP) == MEM \
1017 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1018 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1019 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1020 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1021 XEXP (XEXP (EXP, 0), 0), \
1024 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[0], in
[0]));
1025 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1026 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1028 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[1], in
[1]));
1029 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1030 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1033 emit_insn (fixup
[0]);
1035 emit_insn (fixup
[1]);
1037 #undef MAYBE_ADD_REG_INC_NOTE
1040 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1041 through memory plus an extra GR scratch register. Except that you can
1042 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1043 SECONDARY_RELOAD_CLASS, but not both.
1045 We got into problems in the first place by allowing a construct like
1046 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1047 This solution attempts to prevent this situation from occurring. When
1048 we see something like the above, we spill the inner register to memory. */
1051 spill_xfmode_operand (rtx in
, int force
)
1053 if (GET_CODE (in
) == SUBREG
1054 && GET_MODE (SUBREG_REG (in
)) == TImode
1055 && GET_CODE (SUBREG_REG (in
)) == REG
)
1057 rtx memt
= assign_stack_temp (TImode
, 16, 0);
1058 emit_move_insn (memt
, SUBREG_REG (in
));
1059 return adjust_address (memt
, XFmode
, 0);
1061 else if (force
&& GET_CODE (in
) == REG
)
1063 rtx memx
= assign_stack_temp (XFmode
, 16, 0);
1064 emit_move_insn (memx
, in
);
1071 /* Emit comparison instruction if necessary, returning the expression
1072 that holds the compare result in the proper mode. */
1074 static GTY(()) rtx cmptf_libfunc
;
1077 ia64_expand_compare (enum rtx_code code
, enum machine_mode mode
)
1079 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1082 /* If we have a BImode input, then we already have a compare result, and
1083 do not need to emit another comparison. */
1084 if (GET_MODE (op0
) == BImode
)
1086 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1091 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1092 magic number as its third argument, that indicates what to do.
1093 The return value is an integer to be compared against zero. */
1094 else if (GET_MODE (op0
) == TFmode
)
1097 QCMP_INV
= 1, /* Raise FP_INVALID on SNaN as a side effect. */
1103 enum rtx_code ncode
;
1105 if (!cmptf_libfunc
|| GET_MODE (op1
) != TFmode
)
1109 /* 1 = equal, 0 = not equal. Equality operators do
1110 not raise FP_INVALID when given an SNaN operand. */
1111 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1112 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1113 /* isunordered() from C99. */
1114 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1115 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1116 /* Relational operators raise FP_INVALID when given
1118 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1119 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1120 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1121 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1122 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1123 Expanders for buneq etc. weuld have to be added to ia64.md
1124 for this to be useful. */
1130 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1131 op0
, TFmode
, op1
, TFmode
,
1132 GEN_INT (magic
), DImode
);
1133 cmp
= gen_reg_rtx (BImode
);
1134 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1135 gen_rtx_fmt_ee (ncode
, BImode
,
1138 insns
= get_insns ();
1141 emit_libcall_block (insns
, cmp
, cmp
,
1142 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
));
1147 cmp
= gen_reg_rtx (BImode
);
1148 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1149 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1153 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1156 /* Emit the appropriate sequence for a call. */
1159 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
1164 addr
= XEXP (addr
, 0);
1165 addr
= convert_memory_address (DImode
, addr
);
1166 b0
= gen_rtx_REG (DImode
, R_BR (0));
1168 /* ??? Should do this for functions known to bind local too. */
1169 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1172 insn
= gen_sibcall_nogp (addr
);
1174 insn
= gen_call_nogp (addr
, b0
);
1176 insn
= gen_call_value_nogp (retval
, addr
, b0
);
1177 insn
= emit_call_insn (insn
);
1182 insn
= gen_sibcall_gp (addr
);
1184 insn
= gen_call_gp (addr
, b0
);
1186 insn
= gen_call_value_gp (retval
, addr
, b0
);
1187 insn
= emit_call_insn (insn
);
1189 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
1193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
1197 ia64_reload_gp (void)
1201 if (current_frame_info
.reg_save_gp
)
1202 tmp
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_gp
);
1205 HOST_WIDE_INT offset
;
1207 offset
= (current_frame_info
.spill_cfa_off
1208 + current_frame_info
.spill_size
);
1209 if (frame_pointer_needed
)
1211 tmp
= hard_frame_pointer_rtx
;
1216 tmp
= stack_pointer_rtx
;
1217 offset
= current_frame_info
.total_size
- offset
;
1220 if (CONST_OK_FOR_I (offset
))
1221 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1222 tmp
, GEN_INT (offset
)));
1225 emit_move_insn (pic_offset_table_rtx
, GEN_INT (offset
));
1226 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1227 pic_offset_table_rtx
, tmp
));
1230 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
1233 emit_move_insn (pic_offset_table_rtx
, tmp
);
1237 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
1238 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
1241 bool is_desc
= false;
1243 /* If we find we're calling through a register, then we're actually
1244 calling through a descriptor, so load up the values. */
1245 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
1250 /* ??? We are currently constrained to *not* use peep2, because
1251 we can legitimately change the global lifetime of the GP
1252 (in the form of killing where previously live). This is
1253 because a call through a descriptor doesn't use the previous
1254 value of the GP, while a direct call does, and we do not
1255 commit to either form until the split here.
1257 That said, this means that we lack precise life info for
1258 whether ADDR is dead after this call. This is not terribly
1259 important, since we can fix things up essentially for free
1260 with the POST_DEC below, but it's nice to not use it when we
1261 can immediately tell it's not necessary. */
1262 addr_dead_p
= ((noreturn_p
|| sibcall_p
1263 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
1265 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
1267 /* Load the code address into scratch_b. */
1268 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
1269 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1270 emit_move_insn (scratch_r
, tmp
);
1271 emit_move_insn (scratch_b
, scratch_r
);
1273 /* Load the GP address. If ADDR is not dead here, then we must
1274 revert the change made above via the POST_INCREMENT. */
1276 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
1279 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1280 emit_move_insn (pic_offset_table_rtx
, tmp
);
1287 insn
= gen_sibcall_nogp (addr
);
1289 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
1291 insn
= gen_call_nogp (addr
, retaddr
);
1292 emit_call_insn (insn
);
1294 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
1298 /* Begin the assembly file. */
1301 ia64_file_start (void)
1303 default_file_start ();
1304 emit_safe_across_calls ();
1308 emit_safe_across_calls (void)
1310 unsigned int rs
, re
;
1317 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1321 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1325 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
1329 fputc (',', asm_out_file
);
1331 fprintf (asm_out_file
, "p%u", rs
);
1333 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
1337 fputc ('\n', asm_out_file
);
1340 /* Helper function for ia64_compute_frame_size: find an appropriate general
1341 register to spill some special register to. SPECIAL_SPILL_MASK contains
1342 bits in GR0 to GR31 that have already been allocated by this routine.
1343 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1346 find_gr_spill (int try_locals
)
1350 /* If this is a leaf function, first try an otherwise unused
1351 call-clobbered register. */
1352 if (current_function_is_leaf
)
1354 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1355 if (! regs_ever_live
[regno
]
1356 && call_used_regs
[regno
]
1357 && ! fixed_regs
[regno
]
1358 && ! global_regs
[regno
]
1359 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1361 current_frame_info
.gr_used_mask
|= 1 << regno
;
1368 regno
= current_frame_info
.n_local_regs
;
1369 /* If there is a frame pointer, then we can't use loc79, because
1370 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1371 reg_name switching code in ia64_expand_prologue. */
1372 if (regno
< (80 - frame_pointer_needed
))
1374 current_frame_info
.n_local_regs
= regno
+ 1;
1375 return LOC_REG (0) + regno
;
1379 /* Failed to find a general register to spill to. Must use stack. */
1383 /* In order to make for nice schedules, we try to allocate every temporary
1384 to a different register. We must of course stay away from call-saved,
1385 fixed, and global registers. We must also stay away from registers
1386 allocated in current_frame_info.gr_used_mask, since those include regs
1387 used all through the prologue.
1389 Any register allocated here must be used immediately. The idea is to
1390 aid scheduling, not to solve data flow problems. */
1392 static int last_scratch_gr_reg
;
1395 next_scratch_gr_reg (void)
1399 for (i
= 0; i
< 32; ++i
)
1401 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1402 if (call_used_regs
[regno
]
1403 && ! fixed_regs
[regno
]
1404 && ! global_regs
[regno
]
1405 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1407 last_scratch_gr_reg
= regno
;
1412 /* There must be _something_ available. */
1416 /* Helper function for ia64_compute_frame_size, called through
1417 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1420 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
1422 unsigned int regno
= REGNO (reg
);
1425 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1426 for (i
= 0; i
< n
; ++i
)
1427 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1431 /* Returns the number of bytes offset between the frame pointer and the stack
1432 pointer for the current function. SIZE is the number of bytes of space
1433 needed for local variables. */
1436 ia64_compute_frame_size (HOST_WIDE_INT size
)
1438 HOST_WIDE_INT total_size
;
1439 HOST_WIDE_INT spill_size
= 0;
1440 HOST_WIDE_INT extra_spill_size
= 0;
1441 HOST_WIDE_INT pretend_args_size
;
1444 int spilled_gr_p
= 0;
1445 int spilled_fr_p
= 0;
1449 if (current_frame_info
.initialized
)
1452 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1453 CLEAR_HARD_REG_SET (mask
);
1455 /* Don't allocate scratches to the return register. */
1456 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1458 /* Don't allocate scratches to the EH scratch registers. */
1459 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1460 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1461 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1462 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1464 /* Find the size of the register stack frame. We have only 80 local
1465 registers, because we reserve 8 for the inputs and 8 for the
1468 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1469 since we'll be adjusting that down later. */
1470 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1471 for (; regno
>= LOC_REG (0); regno
--)
1472 if (regs_ever_live
[regno
])
1474 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1476 /* For functions marked with the syscall_linkage attribute, we must mark
1477 all eight input registers as in use, so that locals aren't visible to
1480 if (cfun
->machine
->n_varargs
> 0
1481 || lookup_attribute ("syscall_linkage",
1482 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1483 current_frame_info
.n_input_regs
= 8;
1486 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1487 if (regs_ever_live
[regno
])
1489 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1492 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1493 if (regs_ever_live
[regno
])
1495 i
= regno
- OUT_REG (0) + 1;
1497 /* When -p profiling, we need one output register for the mcount argument.
1498 Likewise for -a profiling for the bb_init_func argument. For -ax
1499 profiling, we need two output registers for the two bb_init_trace_func
1501 if (current_function_profile
)
1503 current_frame_info
.n_output_regs
= i
;
1505 /* ??? No rotating register support yet. */
1506 current_frame_info
.n_rotate_regs
= 0;
1508 /* Discover which registers need spilling, and how much room that
1509 will take. Begin with floating point and general registers,
1510 which will always wind up on the stack. */
1512 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1513 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1515 SET_HARD_REG_BIT (mask
, regno
);
1521 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1522 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1524 SET_HARD_REG_BIT (mask
, regno
);
1530 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1531 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1533 SET_HARD_REG_BIT (mask
, regno
);
1538 /* Now come all special registers that might get saved in other
1539 general registers. */
1541 if (frame_pointer_needed
)
1543 current_frame_info
.reg_fp
= find_gr_spill (1);
1544 /* If we did not get a register, then we take LOC79. This is guaranteed
1545 to be free, even if regs_ever_live is already set, because this is
1546 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1547 as we don't count loc79 above. */
1548 if (current_frame_info
.reg_fp
== 0)
1550 current_frame_info
.reg_fp
= LOC_REG (79);
1551 current_frame_info
.n_local_regs
++;
1555 if (! current_function_is_leaf
)
1557 /* Emit a save of BR0 if we call other functions. Do this even
1558 if this function doesn't return, as EH depends on this to be
1559 able to unwind the stack. */
1560 SET_HARD_REG_BIT (mask
, BR_REG (0));
1562 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1563 if (current_frame_info
.reg_save_b0
== 0)
1569 /* Similarly for ar.pfs. */
1570 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1571 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1572 if (current_frame_info
.reg_save_ar_pfs
== 0)
1574 extra_spill_size
+= 8;
1578 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1579 registers are clobbered, so we fall back to the stack. */
1580 current_frame_info
.reg_save_gp
1581 = (current_function_calls_setjmp
? 0 : find_gr_spill (1));
1582 if (current_frame_info
.reg_save_gp
== 0)
1584 SET_HARD_REG_BIT (mask
, GR_REG (1));
1591 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1593 SET_HARD_REG_BIT (mask
, BR_REG (0));
1598 if (regs_ever_live
[AR_PFS_REGNUM
])
1600 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1601 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1602 if (current_frame_info
.reg_save_ar_pfs
== 0)
1604 extra_spill_size
+= 8;
1610 /* Unwind descriptor hackery: things are most efficient if we allocate
1611 consecutive GR save registers for RP, PFS, FP in that order. However,
1612 it is absolutely critical that FP get the only hard register that's
1613 guaranteed to be free, so we allocated it first. If all three did
1614 happen to be allocated hard regs, and are consecutive, rearrange them
1615 into the preferred order now. */
1616 if (current_frame_info
.reg_fp
!= 0
1617 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1618 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1620 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1621 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1622 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1625 /* See if we need to store the predicate register block. */
1626 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1627 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1629 if (regno
<= PR_REG (63))
1631 SET_HARD_REG_BIT (mask
, PR_REG (0));
1632 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1633 if (current_frame_info
.reg_save_pr
== 0)
1635 extra_spill_size
+= 8;
1639 /* ??? Mark them all as used so that register renaming and such
1640 are free to use them. */
1641 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1642 regs_ever_live
[regno
] = 1;
1645 /* If we're forced to use st8.spill, we're forced to save and restore
1646 ar.unat as well. The check for existing liveness allows inline asm
1647 to touch ar.unat. */
1648 if (spilled_gr_p
|| cfun
->machine
->n_varargs
1649 || regs_ever_live
[AR_UNAT_REGNUM
])
1651 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
1652 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1653 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1654 if (current_frame_info
.reg_save_ar_unat
== 0)
1656 extra_spill_size
+= 8;
1661 if (regs_ever_live
[AR_LC_REGNUM
])
1663 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1664 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1665 if (current_frame_info
.reg_save_ar_lc
== 0)
1667 extra_spill_size
+= 8;
1672 /* If we have an odd number of words of pretend arguments written to
1673 the stack, then the FR save area will be unaligned. We round the
1674 size of this area up to keep things 16 byte aligned. */
1676 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1678 pretend_args_size
= current_function_pretend_args_size
;
1680 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1681 + current_function_outgoing_args_size
);
1682 total_size
= IA64_STACK_ALIGN (total_size
);
1684 /* We always use the 16-byte scratch area provided by the caller, but
1685 if we are a leaf function, there's no one to which we need to provide
1687 if (current_function_is_leaf
)
1688 total_size
= MAX (0, total_size
- 16);
1690 current_frame_info
.total_size
= total_size
;
1691 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1692 current_frame_info
.spill_size
= spill_size
;
1693 current_frame_info
.extra_spill_size
= extra_spill_size
;
1694 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1695 current_frame_info
.n_spilled
= n_spilled
;
1696 current_frame_info
.initialized
= reload_completed
;
1699 /* Compute the initial difference between the specified pair of registers. */
1702 ia64_initial_elimination_offset (int from
, int to
)
1704 HOST_WIDE_INT offset
;
1706 ia64_compute_frame_size (get_frame_size ());
1709 case FRAME_POINTER_REGNUM
:
1710 if (to
== HARD_FRAME_POINTER_REGNUM
)
1712 if (current_function_is_leaf
)
1713 offset
= -current_frame_info
.total_size
;
1715 offset
= -(current_frame_info
.total_size
1716 - current_function_outgoing_args_size
- 16);
1718 else if (to
== STACK_POINTER_REGNUM
)
1720 if (current_function_is_leaf
)
1723 offset
= 16 + current_function_outgoing_args_size
;
1729 case ARG_POINTER_REGNUM
:
1730 /* Arguments start above the 16 byte save area, unless stdarg
1731 in which case we store through the 16 byte save area. */
1732 if (to
== HARD_FRAME_POINTER_REGNUM
)
1733 offset
= 16 - current_function_pretend_args_size
;
1734 else if (to
== STACK_POINTER_REGNUM
)
1735 offset
= (current_frame_info
.total_size
1736 + 16 - current_function_pretend_args_size
);
1748 /* If there are more than a trivial number of register spills, we use
1749 two interleaved iterators so that we can get two memory references
1752 In order to simplify things in the prologue and epilogue expanders,
1753 we use helper functions to fix up the memory references after the
1754 fact with the appropriate offsets to a POST_MODIFY memory mode.
1755 The following data structure tracks the state of the two iterators
1756 while insns are being emitted. */
1758 struct spill_fill_data
1760 rtx init_after
; /* point at which to emit initializations */
1761 rtx init_reg
[2]; /* initial base register */
1762 rtx iter_reg
[2]; /* the iterator registers */
1763 rtx
*prev_addr
[2]; /* address of last memory use */
1764 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
1765 HOST_WIDE_INT prev_off
[2]; /* last offset */
1766 int n_iter
; /* number of iterators in use */
1767 int next_iter
; /* next iterator to use */
1768 unsigned int save_gr_used_mask
;
1771 static struct spill_fill_data spill_fill_data
;
1774 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
1778 spill_fill_data
.init_after
= get_last_insn ();
1779 spill_fill_data
.init_reg
[0] = init_reg
;
1780 spill_fill_data
.init_reg
[1] = init_reg
;
1781 spill_fill_data
.prev_addr
[0] = NULL
;
1782 spill_fill_data
.prev_addr
[1] = NULL
;
1783 spill_fill_data
.prev_insn
[0] = NULL
;
1784 spill_fill_data
.prev_insn
[1] = NULL
;
1785 spill_fill_data
.prev_off
[0] = cfa_off
;
1786 spill_fill_data
.prev_off
[1] = cfa_off
;
1787 spill_fill_data
.next_iter
= 0;
1788 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1790 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1791 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1793 int regno
= next_scratch_gr_reg ();
1794 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1795 current_frame_info
.gr_used_mask
|= 1 << regno
;
1800 finish_spill_pointers (void)
1802 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1806 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
1808 int iter
= spill_fill_data
.next_iter
;
1809 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1810 rtx disp_rtx
= GEN_INT (disp
);
1813 if (spill_fill_data
.prev_addr
[iter
])
1815 if (CONST_OK_FOR_N (disp
))
1817 *spill_fill_data
.prev_addr
[iter
]
1818 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1819 gen_rtx_PLUS (DImode
,
1820 spill_fill_data
.iter_reg
[iter
],
1822 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
1823 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
1824 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
1828 /* ??? Could use register post_modify for loads. */
1829 if (! CONST_OK_FOR_I (disp
))
1831 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1832 emit_move_insn (tmp
, disp_rtx
);
1835 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1836 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1839 /* Micro-optimization: if we've created a frame pointer, it's at
1840 CFA 0, which may allow the real iterator to be initialized lower,
1841 slightly increasing parallelism. Also, if there are few saves
1842 it may eliminate the iterator entirely. */
1844 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1845 && frame_pointer_needed
)
1847 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1848 set_mem_alias_set (mem
, get_varargs_alias_set ());
1856 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1857 spill_fill_data
.init_reg
[iter
]);
1862 if (! CONST_OK_FOR_I (disp
))
1864 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1865 emit_move_insn (tmp
, disp_rtx
);
1869 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1870 spill_fill_data
.init_reg
[iter
],
1877 /* Careful for being the first insn in a sequence. */
1878 if (spill_fill_data
.init_after
)
1879 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
1882 rtx first
= get_insns ();
1884 insn
= emit_insn_before (seq
, first
);
1886 insn
= emit_insn (seq
);
1888 spill_fill_data
.init_after
= insn
;
1890 /* If DISP is 0, we may or may not have a further adjustment
1891 afterward. If we do, then the load/store insn may be modified
1892 to be a post-modify. If we don't, then this copy may be
1893 eliminated by copyprop_hardreg_forward, which makes this
1894 insn garbage, which runs afoul of the sanity check in
1895 propagate_one_insn. So mark this insn as legal to delete. */
1897 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1901 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1903 /* ??? Not all of the spills are for varargs, but some of them are.
1904 The rest of the spills belong in an alias set of their own. But
1905 it doesn't actually hurt to include them here. */
1906 set_mem_alias_set (mem
, get_varargs_alias_set ());
1908 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1909 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1911 if (++iter
>= spill_fill_data
.n_iter
)
1913 spill_fill_data
.next_iter
= iter
;
1919 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
1922 int iter
= spill_fill_data
.next_iter
;
1925 mem
= spill_restore_mem (reg
, cfa_off
);
1926 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1927 spill_fill_data
.prev_insn
[iter
] = insn
;
1934 RTX_FRAME_RELATED_P (insn
) = 1;
1936 /* Don't even pretend that the unwind code can intuit its way
1937 through a pair of interleaved post_modify iterators. Just
1938 provide the correct answer. */
1940 if (frame_pointer_needed
)
1942 base
= hard_frame_pointer_rtx
;
1947 base
= stack_pointer_rtx
;
1948 off
= current_frame_info
.total_size
- cfa_off
;
1952 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1953 gen_rtx_SET (VOIDmode
,
1954 gen_rtx_MEM (GET_MODE (reg
),
1955 plus_constant (base
, off
)),
1962 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
1964 int iter
= spill_fill_data
.next_iter
;
1967 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1968 GEN_INT (cfa_off
)));
1969 spill_fill_data
.prev_insn
[iter
] = insn
;
1972 /* Wrapper functions that discards the CONST_INT spill offset. These
1973 exist so that we can give gr_spill/gr_fill the offset they need and
1974 use a consistent function interface. */
1977 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1979 return gen_movdi (dest
, src
);
1983 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1985 return gen_fr_spill (dest
, src
);
1989 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1991 return gen_fr_restore (dest
, src
);
1994 /* Called after register allocation to add any instructions needed for the
1995 prologue. Using a prologue insn is favored compared to putting all of the
1996 instructions in output_function_prologue(), since it allows the scheduler
1997 to intermix instructions with the saves of the caller saved registers. In
1998 some cases, it might be necessary to emit a barrier instruction as the last
1999 insn to prevent such scheduling.
2001 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2002 so that the debug info generation code can handle them properly.
2004 The register save area is layed out like so:
2006 [ varargs spill area ]
2007 [ fr register spill area ]
2008 [ br register spill area ]
2009 [ ar register spill area ]
2010 [ pr register spill area ]
2011 [ gr register spill area ] */
2013 /* ??? Get inefficient code when the frame size is larger than can fit in an
2014 adds instruction. */
2017 ia64_expand_prologue (void)
2019 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2020 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2023 ia64_compute_frame_size (get_frame_size ());
2024 last_scratch_gr_reg
= 15;
2026 /* If there is no epilogue, then we don't need some prologue insns.
2027 We need to avoid emitting the dead prologue insns, because flow
2028 will complain about them. */
2033 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
2035 if ((e
->flags
& EDGE_FAKE
) == 0
2036 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2039 epilogue_p
= (e
!= NULL
);
2044 /* Set the local, input, and output register names. We need to do this
2045 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2046 half. If we use in/loc/out register names, then we get assembler errors
2047 in crtn.S because there is no alloc insn or regstk directive in there. */
2048 if (! TARGET_REG_NAMES
)
2050 int inputs
= current_frame_info
.n_input_regs
;
2051 int locals
= current_frame_info
.n_local_regs
;
2052 int outputs
= current_frame_info
.n_output_regs
;
2054 for (i
= 0; i
< inputs
; i
++)
2055 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2056 for (i
= 0; i
< locals
; i
++)
2057 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2058 for (i
= 0; i
< outputs
; i
++)
2059 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2062 /* Set the frame pointer register name. The regnum is logically loc79,
2063 but of course we'll not have allocated that many locals. Rather than
2064 worrying about renumbering the existing rtxs, we adjust the name. */
2065 /* ??? This code means that we can never use one local register when
2066 there is a frame pointer. loc79 gets wasted in this case, as it is
2067 renamed to a register that will never be used. See also the try_locals
2068 code in find_gr_spill. */
2069 if (current_frame_info
.reg_fp
)
2071 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2072 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2073 = reg_names
[current_frame_info
.reg_fp
];
2074 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2077 /* We don't need an alloc instruction if we've used no outputs or locals. */
2078 if (current_frame_info
.n_local_regs
== 0
2079 && current_frame_info
.n_output_regs
== 0
2080 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
2081 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2083 /* If there is no alloc, but there are input registers used, then we
2084 need a .regstk directive. */
2085 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2086 ar_pfs_save_reg
= NULL_RTX
;
2090 current_frame_info
.need_regstk
= 0;
2092 if (current_frame_info
.reg_save_ar_pfs
)
2093 regno
= current_frame_info
.reg_save_ar_pfs
;
2095 regno
= next_scratch_gr_reg ();
2096 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2098 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2099 GEN_INT (current_frame_info
.n_input_regs
),
2100 GEN_INT (current_frame_info
.n_local_regs
),
2101 GEN_INT (current_frame_info
.n_output_regs
),
2102 GEN_INT (current_frame_info
.n_rotate_regs
)));
2103 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2106 /* Set up frame pointer, stack pointer, and spill iterators. */
2108 n_varargs
= cfun
->machine
->n_varargs
;
2109 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2110 stack_pointer_rtx
, 0);
2112 if (frame_pointer_needed
)
2114 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2115 RTX_FRAME_RELATED_P (insn
) = 1;
2118 if (current_frame_info
.total_size
!= 0)
2120 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2123 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2124 offset
= frame_size_rtx
;
2127 regno
= next_scratch_gr_reg ();
2128 offset
= gen_rtx_REG (DImode
, regno
);
2129 emit_move_insn (offset
, frame_size_rtx
);
2132 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2133 stack_pointer_rtx
, offset
));
2135 if (! frame_pointer_needed
)
2137 RTX_FRAME_RELATED_P (insn
) = 1;
2138 if (GET_CODE (offset
) != CONST_INT
)
2141 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2142 gen_rtx_SET (VOIDmode
,
2144 gen_rtx_PLUS (DImode
,
2151 /* ??? At this point we must generate a magic insn that appears to
2152 modify the stack pointer, the frame pointer, and all spill
2153 iterators. This would allow the most scheduling freedom. For
2154 now, just hard stop. */
2155 emit_insn (gen_blockage ());
2158 /* Must copy out ar.unat before doing any integer spills. */
2159 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2161 if (current_frame_info
.reg_save_ar_unat
)
2163 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2166 alt_regno
= next_scratch_gr_reg ();
2167 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2168 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2171 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2172 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2173 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2175 /* Even if we're not going to generate an epilogue, we still
2176 need to save the register so that EH works. */
2177 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2178 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2181 ar_unat_save_reg
= NULL_RTX
;
2183 /* Spill all varargs registers. Do this before spilling any GR registers,
2184 since we want the UNAT bits for the GR registers to override the UNAT
2185 bits from varargs, which we don't care about. */
2188 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2190 reg
= gen_rtx_REG (DImode
, regno
);
2191 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2194 /* Locate the bottom of the register save area. */
2195 cfa_off
= (current_frame_info
.spill_cfa_off
2196 + current_frame_info
.spill_size
2197 + current_frame_info
.extra_spill_size
);
2199 /* Save the predicate register block either in a register or in memory. */
2200 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2202 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2203 if (current_frame_info
.reg_save_pr
!= 0)
2205 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2206 insn
= emit_move_insn (alt_reg
, reg
);
2208 /* ??? Denote pr spill/fill by a DImode move that modifies all
2209 64 hard registers. */
2210 RTX_FRAME_RELATED_P (insn
) = 1;
2212 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2213 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2216 /* Even if we're not going to generate an epilogue, we still
2217 need to save the register so that EH works. */
2219 emit_insn (gen_prologue_use (alt_reg
));
2223 alt_regno
= next_scratch_gr_reg ();
2224 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2225 insn
= emit_move_insn (alt_reg
, reg
);
2226 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2231 /* Handle AR regs in numerical order. All of them get special handling. */
2232 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2233 && current_frame_info
.reg_save_ar_unat
== 0)
2235 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2236 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2240 /* The alloc insn already copied ar.pfs into a general register. The
2241 only thing we have to do now is copy that register to a stack slot
2242 if we'd not allocated a local register for the job. */
2243 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
2244 && current_frame_info
.reg_save_ar_pfs
== 0)
2246 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2247 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2251 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2253 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2254 if (current_frame_info
.reg_save_ar_lc
!= 0)
2256 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2257 insn
= emit_move_insn (alt_reg
, reg
);
2258 RTX_FRAME_RELATED_P (insn
) = 1;
2260 /* Even if we're not going to generate an epilogue, we still
2261 need to save the register so that EH works. */
2263 emit_insn (gen_prologue_use (alt_reg
));
2267 alt_regno
= next_scratch_gr_reg ();
2268 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2269 emit_move_insn (alt_reg
, reg
);
2270 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2275 if (current_frame_info
.reg_save_gp
)
2277 insn
= emit_move_insn (gen_rtx_REG (DImode
,
2278 current_frame_info
.reg_save_gp
),
2279 pic_offset_table_rtx
);
2280 /* We don't know for sure yet if this is actually needed, since
2281 we've not split the PIC call patterns. If all of the calls
2282 are indirect, and not followed by any uses of the gp, then
2283 this save is dead. Allow it to go away. */
2285 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, REG_NOTES (insn
));
2288 /* We should now be at the base of the gr/br/fr spill area. */
2289 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2290 + current_frame_info
.spill_size
))
2293 /* Spill all general registers. */
2294 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2295 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2297 reg
= gen_rtx_REG (DImode
, regno
);
2298 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2302 /* Handle BR0 specially -- it may be getting stored permanently in
2303 some GR register. */
2304 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2306 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2307 if (current_frame_info
.reg_save_b0
!= 0)
2309 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2310 insn
= emit_move_insn (alt_reg
, reg
);
2311 RTX_FRAME_RELATED_P (insn
) = 1;
2313 /* Even if we're not going to generate an epilogue, we still
2314 need to save the register so that EH works. */
2316 emit_insn (gen_prologue_use (alt_reg
));
2320 alt_regno
= next_scratch_gr_reg ();
2321 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2322 emit_move_insn (alt_reg
, reg
);
2323 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2328 /* Spill the rest of the BR registers. */
2329 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2330 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2332 alt_regno
= next_scratch_gr_reg ();
2333 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2334 reg
= gen_rtx_REG (DImode
, regno
);
2335 emit_move_insn (alt_reg
, reg
);
2336 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2340 /* Align the frame and spill all FR registers. */
2341 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2342 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2346 reg
= gen_rtx_REG (XFmode
, regno
);
2347 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2351 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2354 finish_spill_pointers ();
2357 /* Called after register allocation to add any instructions needed for the
2358 epilogue. Using an epilogue insn is favored compared to putting all of the
2359 instructions in output_function_prologue(), since it allows the scheduler
2360 to intermix instructions with the saves of the caller saved registers. In
2361 some cases, it might be necessary to emit a barrier instruction as the last
2362 insn to prevent such scheduling. */
2365 ia64_expand_epilogue (int sibcall_p
)
2367 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2368 int regno
, alt_regno
, cfa_off
;
2370 ia64_compute_frame_size (get_frame_size ());
2372 /* If there is a frame pointer, then we use it instead of the stack
2373 pointer, so that the stack pointer does not need to be valid when
2374 the epilogue starts. See EXIT_IGNORE_STACK. */
2375 if (frame_pointer_needed
)
2376 setup_spill_pointers (current_frame_info
.n_spilled
,
2377 hard_frame_pointer_rtx
, 0);
2379 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2380 current_frame_info
.total_size
);
2382 if (current_frame_info
.total_size
!= 0)
2384 /* ??? At this point we must generate a magic insn that appears to
2385 modify the spill iterators and the frame pointer. This would
2386 allow the most scheduling freedom. For now, just hard stop. */
2387 emit_insn (gen_blockage ());
2390 /* Locate the bottom of the register save area. */
2391 cfa_off
= (current_frame_info
.spill_cfa_off
2392 + current_frame_info
.spill_size
2393 + current_frame_info
.extra_spill_size
);
2395 /* Restore the predicate registers. */
2396 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2398 if (current_frame_info
.reg_save_pr
!= 0)
2399 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2402 alt_regno
= next_scratch_gr_reg ();
2403 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2404 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2407 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2408 emit_move_insn (reg
, alt_reg
);
2411 /* Restore the application registers. */
2413 /* Load the saved unat from the stack, but do not restore it until
2414 after the GRs have been restored. */
2415 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2417 if (current_frame_info
.reg_save_ar_unat
!= 0)
2419 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2422 alt_regno
= next_scratch_gr_reg ();
2423 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2424 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2425 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2430 ar_unat_save_reg
= NULL_RTX
;
2432 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2434 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2435 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2436 emit_move_insn (reg
, alt_reg
);
2438 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2440 alt_regno
= next_scratch_gr_reg ();
2441 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2442 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2444 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2445 emit_move_insn (reg
, alt_reg
);
2448 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2450 if (current_frame_info
.reg_save_ar_lc
!= 0)
2451 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2454 alt_regno
= next_scratch_gr_reg ();
2455 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2456 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2459 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2460 emit_move_insn (reg
, alt_reg
);
2463 /* We should now be at the base of the gr/br/fr spill area. */
2464 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2465 + current_frame_info
.spill_size
))
2468 /* The GP may be stored on the stack in the prologue, but it's
2469 never restored in the epilogue. Skip the stack slot. */
2470 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
2473 /* Restore all general registers. */
2474 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
2475 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2477 reg
= gen_rtx_REG (DImode
, regno
);
2478 do_restore (gen_gr_restore
, reg
, cfa_off
);
2482 /* Restore the branch registers. Handle B0 specially, as it may
2483 have gotten stored in some GR register. */
2484 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2486 if (current_frame_info
.reg_save_b0
!= 0)
2487 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2490 alt_regno
= next_scratch_gr_reg ();
2491 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2492 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2495 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2496 emit_move_insn (reg
, alt_reg
);
2499 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2500 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2502 alt_regno
= next_scratch_gr_reg ();
2503 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2504 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2506 reg
= gen_rtx_REG (DImode
, regno
);
2507 emit_move_insn (reg
, alt_reg
);
2510 /* Restore floating point registers. */
2511 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2512 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2516 reg
= gen_rtx_REG (XFmode
, regno
);
2517 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2521 /* Restore ar.unat for real. */
2522 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2524 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2525 emit_move_insn (reg
, ar_unat_save_reg
);
2528 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2531 finish_spill_pointers ();
2533 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2535 /* ??? At this point we must generate a magic insn that appears to
2536 modify the spill iterators, the stack pointer, and the frame
2537 pointer. This would allow the most scheduling freedom. For now,
2539 emit_insn (gen_blockage ());
2542 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2543 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2544 else if (frame_pointer_needed
)
2546 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2547 RTX_FRAME_RELATED_P (insn
) = 1;
2549 else if (current_frame_info
.total_size
)
2551 rtx offset
, frame_size_rtx
;
2553 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2554 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2555 offset
= frame_size_rtx
;
2558 regno
= next_scratch_gr_reg ();
2559 offset
= gen_rtx_REG (DImode
, regno
);
2560 emit_move_insn (offset
, frame_size_rtx
);
2563 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2566 RTX_FRAME_RELATED_P (insn
) = 1;
2567 if (GET_CODE (offset
) != CONST_INT
)
2570 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2571 gen_rtx_SET (VOIDmode
,
2573 gen_rtx_PLUS (DImode
,
2580 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2581 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2584 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2587 int fp
= GR_REG (2);
2588 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2589 first available call clobbered register. If there was a frame_pointer
2590 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2591 so we have to make sure we're using the string "r2" when emitting
2592 the register name for the assembler. */
2593 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2594 fp
= HARD_FRAME_POINTER_REGNUM
;
2596 /* We must emit an alloc to force the input registers to become output
2597 registers. Otherwise, if the callee tries to pass its parameters
2598 through to another call without an intervening alloc, then these
2600 /* ??? We don't need to preserve all input registers. We only need to
2601 preserve those input registers used as arguments to the sibling call.
2602 It is unclear how to compute that number here. */
2603 if (current_frame_info
.n_input_regs
!= 0)
2604 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2605 const0_rtx
, const0_rtx
,
2606 GEN_INT (current_frame_info
.n_input_regs
),
2611 /* Return 1 if br.ret can do all the work required to return from a
2615 ia64_direct_return (void)
2617 if (reload_completed
&& ! frame_pointer_needed
)
2619 ia64_compute_frame_size (get_frame_size ());
2621 return (current_frame_info
.total_size
== 0
2622 && current_frame_info
.n_spilled
== 0
2623 && current_frame_info
.reg_save_b0
== 0
2624 && current_frame_info
.reg_save_pr
== 0
2625 && current_frame_info
.reg_save_ar_pfs
== 0
2626 && current_frame_info
.reg_save_ar_unat
== 0
2627 && current_frame_info
.reg_save_ar_lc
== 0);
2632 /* Return the magic cookie that we use to hold the return address
2633 during early compilation. */
2636 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
2640 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
2643 /* Split this value after reload, now that we know where the return
2644 address is saved. */
2647 ia64_split_return_addr_rtx (rtx dest
)
2651 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2653 if (current_frame_info
.reg_save_b0
!= 0)
2654 src
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2660 /* Compute offset from CFA for BR0. */
2661 /* ??? Must be kept in sync with ia64_expand_prologue. */
2662 off
= (current_frame_info
.spill_cfa_off
2663 + current_frame_info
.spill_size
);
2664 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2665 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2668 /* Convert CFA offset to a register based offset. */
2669 if (frame_pointer_needed
)
2670 src
= hard_frame_pointer_rtx
;
2673 src
= stack_pointer_rtx
;
2674 off
+= current_frame_info
.total_size
;
2677 /* Load address into scratch register. */
2678 if (CONST_OK_FOR_I (off
))
2679 emit_insn (gen_adddi3 (dest
, src
, GEN_INT (off
)));
2682 emit_move_insn (dest
, GEN_INT (off
));
2683 emit_insn (gen_adddi3 (dest
, src
, dest
));
2686 src
= gen_rtx_MEM (Pmode
, dest
);
2690 src
= gen_rtx_REG (DImode
, BR_REG (0));
2692 emit_move_insn (dest
, src
);
2696 ia64_hard_regno_rename_ok (int from
, int to
)
2698 /* Don't clobber any of the registers we reserved for the prologue. */
2699 if (to
== current_frame_info
.reg_fp
2700 || to
== current_frame_info
.reg_save_b0
2701 || to
== current_frame_info
.reg_save_pr
2702 || to
== current_frame_info
.reg_save_ar_pfs
2703 || to
== current_frame_info
.reg_save_ar_unat
2704 || to
== current_frame_info
.reg_save_ar_lc
)
2707 if (from
== current_frame_info
.reg_fp
2708 || from
== current_frame_info
.reg_save_b0
2709 || from
== current_frame_info
.reg_save_pr
2710 || from
== current_frame_info
.reg_save_ar_pfs
2711 || from
== current_frame_info
.reg_save_ar_unat
2712 || from
== current_frame_info
.reg_save_ar_lc
)
2715 /* Don't use output registers outside the register frame. */
2716 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2719 /* Retain even/oddness on predicate register pairs. */
2720 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2721 return (from
& 1) == (to
& 1);
2726 /* Target hook for assembling integer objects. Handle word-sized
2727 aligned objects and detect the cases when @fptr is needed. */
2730 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
2732 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
2734 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2735 && GET_CODE (x
) == SYMBOL_REF
2736 && SYMBOL_REF_FUNCTION_P (x
))
2738 if (POINTER_SIZE
== 32)
2739 fputs ("\tdata4\t@fptr(", asm_out_file
);
2741 fputs ("\tdata8\t@fptr(", asm_out_file
);
2742 output_addr_const (asm_out_file
, x
);
2743 fputs (")\n", asm_out_file
);
2746 return default_assemble_integer (x
, size
, aligned_p
);
2749 /* Emit the function prologue. */
2752 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
2754 int mask
, grsave
, grsave_prev
;
2756 if (current_frame_info
.need_regstk
)
2757 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2758 current_frame_info
.n_input_regs
,
2759 current_frame_info
.n_local_regs
,
2760 current_frame_info
.n_output_regs
,
2761 current_frame_info
.n_rotate_regs
);
2763 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2766 /* Emit the .prologue directive. */
2769 grsave
= grsave_prev
= 0;
2770 if (current_frame_info
.reg_save_b0
!= 0)
2773 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2775 if (current_frame_info
.reg_save_ar_pfs
!= 0
2776 && (grsave_prev
== 0
2777 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2780 if (grsave_prev
== 0)
2781 grsave
= current_frame_info
.reg_save_ar_pfs
;
2782 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2784 if (current_frame_info
.reg_fp
!= 0
2785 && (grsave_prev
== 0
2786 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2789 if (grsave_prev
== 0)
2790 grsave
= HARD_FRAME_POINTER_REGNUM
;
2791 grsave_prev
= current_frame_info
.reg_fp
;
2793 if (current_frame_info
.reg_save_pr
!= 0
2794 && (grsave_prev
== 0
2795 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2798 if (grsave_prev
== 0)
2799 grsave
= current_frame_info
.reg_save_pr
;
2802 if (mask
&& TARGET_GNU_AS
)
2803 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2804 ia64_dbx_register_number (grsave
));
2806 fputs ("\t.prologue\n", file
);
2808 /* Emit a .spill directive, if necessary, to relocate the base of
2809 the register spill area. */
2810 if (current_frame_info
.spill_cfa_off
!= -16)
2811 fprintf (file
, "\t.spill %ld\n",
2812 (long) (current_frame_info
.spill_cfa_off
2813 + current_frame_info
.spill_size
));
2816 /* Emit the .body directive at the scheduled end of the prologue. */
2819 ia64_output_function_end_prologue (FILE *file
)
2821 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2824 fputs ("\t.body\n", file
);
2827 /* Emit the function epilogue. */
2830 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
2831 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
2835 if (current_frame_info
.reg_fp
)
2837 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2838 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2839 = reg_names
[current_frame_info
.reg_fp
];
2840 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2842 if (! TARGET_REG_NAMES
)
2844 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2845 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2846 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2847 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2848 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2849 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2852 current_frame_info
.initialized
= 0;
2856 ia64_dbx_register_number (int regno
)
2858 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2859 from its home at loc79 to something inside the register frame. We
2860 must perform the same renumbering here for the debug info. */
2861 if (current_frame_info
.reg_fp
)
2863 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2864 regno
= current_frame_info
.reg_fp
;
2865 else if (regno
== current_frame_info
.reg_fp
)
2866 regno
= HARD_FRAME_POINTER_REGNUM
;
2869 if (IN_REGNO_P (regno
))
2870 return 32 + regno
- IN_REG (0);
2871 else if (LOC_REGNO_P (regno
))
2872 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2873 else if (OUT_REGNO_P (regno
))
2874 return (32 + current_frame_info
.n_input_regs
2875 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2881 ia64_initialize_trampoline (rtx addr
, rtx fnaddr
, rtx static_chain
)
2883 rtx addr_reg
, eight
= GEN_INT (8);
2885 /* The Intel assembler requires that the global __ia64_trampoline symbol
2886 be declared explicitly */
2889 static bool declared_ia64_trampoline
= false;
2891 if (!declared_ia64_trampoline
)
2893 declared_ia64_trampoline
= true;
2894 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
2895 "__ia64_trampoline");
2899 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2900 addr
= convert_memory_address (Pmode
, addr
);
2901 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
2902 static_chain
= convert_memory_address (Pmode
, static_chain
);
2904 /* Load up our iterator. */
2905 addr_reg
= gen_reg_rtx (Pmode
);
2906 emit_move_insn (addr_reg
, addr
);
2908 /* The first two words are the fake descriptor:
2909 __ia64_trampoline, ADDR+16. */
2910 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2911 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2912 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2914 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2915 copy_to_reg (plus_constant (addr
, 16)));
2916 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2918 /* The third word is the target descriptor. */
2919 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2920 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2922 /* The fourth word is the static chain. */
2923 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2926 /* Do any needed setup for a variadic function. CUM has not been updated
2927 for the last named argument which has type TYPE and mode MODE.
2929 We generate the actual spill instructions during prologue generation. */
2932 ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2933 tree type
, int * pretend_size
,
2934 int second_time ATTRIBUTE_UNUSED
)
2936 CUMULATIVE_ARGS next_cum
= *cum
;
2938 /* Skip the current argument. */
2939 ia64_function_arg_advance (&next_cum
, mode
, type
, 1);
2941 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
2943 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
2944 *pretend_size
= n
* UNITS_PER_WORD
;
2945 cfun
->machine
->n_varargs
= n
;
2949 /* Check whether TYPE is a homogeneous floating point aggregate. If
2950 it is, return the mode of the floating point type that appears
2951 in all leafs. If it is not, return VOIDmode.
2953 An aggregate is a homogeneous floating point aggregate is if all
2954 fields/elements in it have the same floating point type (e.g,
2955 SFmode). 128-bit quad-precision floats are excluded. */
2957 static enum machine_mode
2958 hfa_element_mode (tree type
, int nested
)
2960 enum machine_mode element_mode
= VOIDmode
;
2961 enum machine_mode mode
;
2962 enum tree_code code
= TREE_CODE (type
);
2963 int know_element_mode
= 0;
2968 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2969 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2970 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2971 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2975 /* Fortran complex types are supposed to be HFAs, so we need to handle
2976 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2979 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
2980 && TYPE_MODE (type
) != TCmode
)
2981 return GET_MODE_INNER (TYPE_MODE (type
));
2986 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2987 mode if this is contained within an aggregate. */
2988 if (nested
&& TYPE_MODE (type
) != TFmode
)
2989 return TYPE_MODE (type
);
2994 return hfa_element_mode (TREE_TYPE (type
), 1);
2998 case QUAL_UNION_TYPE
:
2999 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
3001 if (TREE_CODE (t
) != FIELD_DECL
)
3004 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3005 if (know_element_mode
)
3007 if (mode
!= element_mode
)
3010 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3014 know_element_mode
= 1;
3015 element_mode
= mode
;
3018 return element_mode
;
3021 /* If we reach here, we probably have some front-end specific type
3022 that the backend doesn't know about. This can happen via the
3023 aggregate_value_p call in init_function_start. All we can do is
3024 ignore unknown tree types. */
3031 /* Return the number of words required to hold a quantity of TYPE and MODE
3032 when passed as an argument. */
3034 ia64_function_arg_words (tree type
, enum machine_mode mode
)
3038 if (mode
== BLKmode
)
3039 words
= int_size_in_bytes (type
);
3041 words
= GET_MODE_SIZE (mode
);
3043 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
3046 /* Return the number of registers that should be skipped so the current
3047 argument (described by TYPE and WORDS) will be properly aligned.
3049 Integer and float arguments larger than 8 bytes start at the next
3050 even boundary. Aggregates larger than 8 bytes start at the next
3051 even boundary if the aggregate has 16 byte alignment. Note that
3052 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3053 but are still to be aligned in registers.
3055 ??? The ABI does not specify how to handle aggregates with
3056 alignment from 9 to 15 bytes, or greater than 16. We handle them
3057 all as if they had 16 byte alignment. Such aggregates can occur
3058 only if gcc extensions are used. */
3060 ia64_function_arg_offset (CUMULATIVE_ARGS
*cum
, tree type
, int words
)
3062 if ((cum
->words
& 1) == 0)
3066 && TREE_CODE (type
) != INTEGER_TYPE
3067 && TREE_CODE (type
) != REAL_TYPE
)
3068 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
3073 /* Return rtx for register where argument is passed, or zero if it is passed
3075 /* ??? 128-bit quad-precision floats are always passed in general
3079 ia64_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
, tree type
,
3080 int named
, int incoming
)
3082 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
3083 int words
= ia64_function_arg_words (type
, mode
);
3084 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3085 enum machine_mode hfa_mode
= VOIDmode
;
3087 /* If all argument slots are used, then it must go on the stack. */
3088 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3091 /* Check for and handle homogeneous FP aggregates. */
3093 hfa_mode
= hfa_element_mode (type
, 0);
3095 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3096 and unprototyped hfas are passed specially. */
3097 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3101 int fp_regs
= cum
->fp_regs
;
3102 int int_regs
= cum
->words
+ offset
;
3103 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3107 /* If prototyped, pass it in FR regs then GR regs.
3108 If not prototyped, pass it in both FR and GR regs.
3110 If this is an SFmode aggregate, then it is possible to run out of
3111 FR regs while GR regs are still left. In that case, we pass the
3112 remaining part in the GR regs. */
3114 /* Fill the FP regs. We do this always. We stop if we reach the end
3115 of the argument, the last FP register, or the last argument slot. */
3117 byte_size
= ((mode
== BLKmode
)
3118 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3119 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3121 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3122 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
3124 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3125 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3129 args_byte_size
+= hfa_size
;
3133 /* If no prototype, then the whole thing must go in GR regs. */
3134 if (! cum
->prototype
)
3136 /* If this is an SFmode aggregate, then we might have some left over
3137 that needs to go in GR regs. */
3138 else if (byte_size
!= offset
)
3139 int_regs
+= offset
/ UNITS_PER_WORD
;
3141 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3143 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3145 enum machine_mode gr_mode
= DImode
;
3146 unsigned int gr_size
;
3148 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3149 then this goes in a GR reg left adjusted/little endian, right
3150 adjusted/big endian. */
3151 /* ??? Currently this is handled wrong, because 4-byte hunks are
3152 always right adjusted/little endian. */
3155 /* If we have an even 4 byte hunk because the aggregate is a
3156 multiple of 4 bytes in size, then this goes in a GR reg right
3157 adjusted/little endian. */
3158 else if (byte_size
- offset
== 4)
3161 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3162 gen_rtx_REG (gr_mode
, (basereg
3166 gr_size
= GET_MODE_SIZE (gr_mode
);
3168 if (gr_size
== UNITS_PER_WORD
3169 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
3171 else if (gr_size
> UNITS_PER_WORD
)
3172 int_regs
+= gr_size
/ UNITS_PER_WORD
;
3174 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3177 /* Integral and aggregates go in general registers. If we have run out of
3178 FR registers, then FP values must also go in general registers. This can
3179 happen when we have a SFmode HFA. */
3180 else if (mode
== TFmode
|| mode
== TCmode
3181 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3183 int byte_size
= ((mode
== BLKmode
)
3184 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3185 if (BYTES_BIG_ENDIAN
3186 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3187 && byte_size
< UNITS_PER_WORD
3190 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3191 gen_rtx_REG (DImode
,
3192 (basereg
+ cum
->words
3195 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3198 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3202 /* If there is a prototype, then FP values go in a FR register when
3203 named, and in a GR register when unnamed. */
3204 else if (cum
->prototype
)
3207 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3208 /* In big-endian mode, an anonymous SFmode value must be represented
3209 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3210 the value into the high half of the general register. */
3211 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
3212 return gen_rtx_PARALLEL (mode
,
3214 gen_rtx_EXPR_LIST (VOIDmode
,
3215 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
3218 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3220 /* If there is no prototype, then FP values go in both FR and GR
3224 /* See comment above. */
3225 enum machine_mode inner_mode
=
3226 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
3228 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3229 gen_rtx_REG (mode
, (FR_ARG_FIRST
3232 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3233 gen_rtx_REG (inner_mode
,
3234 (basereg
+ cum
->words
3238 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3242 /* Return number of words, at the beginning of the argument, that must be
3243 put in registers. 0 is the argument is entirely in registers or entirely
3247 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3248 tree type
, int named ATTRIBUTE_UNUSED
)
3250 int words
= ia64_function_arg_words (type
, mode
);
3251 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3253 /* If all argument slots are used, then it must go on the stack. */
3254 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3257 /* It doesn't matter whether the argument goes in FR or GR regs. If
3258 it fits within the 8 argument slots, then it goes entirely in
3259 registers. If it extends past the last argument slot, then the rest
3260 goes on the stack. */
3262 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3265 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3268 /* Update CUM to point after this argument. This is patterned after
3269 ia64_function_arg. */
3272 ia64_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3273 tree type
, int named
)
3275 int words
= ia64_function_arg_words (type
, mode
);
3276 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3277 enum machine_mode hfa_mode
= VOIDmode
;
3279 /* If all arg slots are already full, then there is nothing to do. */
3280 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3283 cum
->words
+= words
+ offset
;
3285 /* Check for and handle homogeneous FP aggregates. */
3287 hfa_mode
= hfa_element_mode (type
, 0);
3289 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3290 and unprototyped hfas are passed specially. */
3291 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3293 int fp_regs
= cum
->fp_regs
;
3294 /* This is the original value of cum->words + offset. */
3295 int int_regs
= cum
->words
- words
;
3296 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3300 /* If prototyped, pass it in FR regs then GR regs.
3301 If not prototyped, pass it in both FR and GR regs.
3303 If this is an SFmode aggregate, then it is possible to run out of
3304 FR regs while GR regs are still left. In that case, we pass the
3305 remaining part in the GR regs. */
3307 /* Fill the FP regs. We do this always. We stop if we reach the end
3308 of the argument, the last FP register, or the last argument slot. */
3310 byte_size
= ((mode
== BLKmode
)
3311 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3312 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3314 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3315 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3318 args_byte_size
+= hfa_size
;
3322 cum
->fp_regs
= fp_regs
;
3325 /* Integral and aggregates go in general registers. If we have run out of
3326 FR registers, then FP values must also go in general registers. This can
3327 happen when we have a SFmode HFA. */
3328 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3329 cum
->int_regs
= cum
->words
;
3331 /* If there is a prototype, then FP values go in a FR register when
3332 named, and in a GR register when unnamed. */
3333 else if (cum
->prototype
)
3336 cum
->int_regs
= cum
->words
;
3338 /* ??? Complex types should not reach here. */
3339 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3341 /* If there is no prototype, then FP values go in both FR and GR
3345 /* ??? Complex types should not reach here. */
3346 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3347 cum
->int_regs
= cum
->words
;
3351 /* Variable sized types are passed by reference. */
3352 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3355 ia64_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3356 enum machine_mode mode ATTRIBUTE_UNUSED
,
3357 tree type
, bool named ATTRIBUTE_UNUSED
)
3359 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3362 /* True if it is OK to do sibling call optimization for the specified
3363 call expression EXP. DECL will be the called function, or NULL if
3364 this is an indirect call. */
3366 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
3368 /* We must always return with our current GP. This means we can
3369 only sibcall to functions defined in the current module. */
3370 return decl
&& (*targetm
.binds_local_p
) (decl
);
3374 /* Implement va_arg. */
3377 ia64_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3379 /* Variable sized types are passed by reference. */
3380 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
3382 tree ptrtype
= build_pointer_type (type
);
3383 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
3384 return build_va_arg_indirect_ref (addr
);
3387 /* Aggregate arguments with alignment larger than 8 bytes start at
3388 the next even boundary. Integer and floating point arguments
3389 do so if they are larger than 8 bytes, whether or not they are
3390 also aligned larger than 8 bytes. */
3391 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
3392 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3394 tree t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3395 build_int_cst (NULL_TREE
, 2 * UNITS_PER_WORD
- 1));
3396 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3397 build_int_cst (NULL_TREE
, -2 * UNITS_PER_WORD
));
3398 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3399 gimplify_and_add (t
, pre_p
);
3402 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3405 /* Return 1 if function return value returned in memory. Return 0 if it is
3409 ia64_return_in_memory (tree valtype
, tree fntype ATTRIBUTE_UNUSED
)
3411 enum machine_mode mode
;
3412 enum machine_mode hfa_mode
;
3413 HOST_WIDE_INT byte_size
;
3415 mode
= TYPE_MODE (valtype
);
3416 byte_size
= GET_MODE_SIZE (mode
);
3417 if (mode
== BLKmode
)
3419 byte_size
= int_size_in_bytes (valtype
);
3424 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3426 hfa_mode
= hfa_element_mode (valtype
, 0);
3427 if (hfa_mode
!= VOIDmode
)
3429 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3431 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3436 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3442 /* Return rtx for register that holds the function return value. */
3445 ia64_function_value (tree valtype
, tree func ATTRIBUTE_UNUSED
)
3447 enum machine_mode mode
;
3448 enum machine_mode hfa_mode
;
3450 mode
= TYPE_MODE (valtype
);
3451 hfa_mode
= hfa_element_mode (valtype
, 0);
3453 if (hfa_mode
!= VOIDmode
)
3461 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3462 byte_size
= ((mode
== BLKmode
)
3463 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3465 for (i
= 0; offset
< byte_size
; i
++)
3467 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3468 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3472 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3474 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
3475 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3478 if (BYTES_BIG_ENDIAN
3479 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
3487 bytesize
= int_size_in_bytes (valtype
);
3488 for (i
= 0; offset
< bytesize
; i
++)
3490 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3491 gen_rtx_REG (DImode
,
3494 offset
+= UNITS_PER_WORD
;
3496 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3499 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3503 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3504 We need to emit DTP-relative relocations. */
3507 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
3511 fputs ("\tdata8.ua\t@dtprel(", file
);
3512 output_addr_const (file
, x
);
3516 /* Print a memory address as an operand to reference that memory location. */
3518 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3519 also call this from ia64_print_operand for memory addresses. */
3522 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
3523 rtx address ATTRIBUTE_UNUSED
)
3527 /* Print an operand to an assembler instruction.
3528 C Swap and print a comparison operator.
3529 D Print an FP comparison operator.
3530 E Print 32 - constant, for SImode shifts as extract.
3531 e Print 64 - constant, for DImode rotates.
3532 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3533 a floating point register emitted normally.
3534 I Invert a predicate register by adding 1.
3535 J Select the proper predicate register for a condition.
3536 j Select the inverse predicate register for a condition.
3537 O Append .acq for volatile load.
3538 P Postincrement of a MEM.
3539 Q Append .rel for volatile store.
3540 S Shift amount for shladd instruction.
3541 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3542 for Intel assembler.
3543 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3544 for Intel assembler.
3545 r Print register name, or constant 0 as r0. HP compatibility for
3548 ia64_print_operand (FILE * file
, rtx x
, int code
)
3555 /* Handled below. */
3560 enum rtx_code c
= swap_condition (GET_CODE (x
));
3561 fputs (GET_RTX_NAME (c
), file
);
3566 switch (GET_CODE (x
))
3578 str
= GET_RTX_NAME (GET_CODE (x
));
3585 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3589 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3593 if (x
== CONST0_RTX (GET_MODE (x
)))
3594 str
= reg_names
[FR_REG (0)];
3595 else if (x
== CONST1_RTX (GET_MODE (x
)))
3596 str
= reg_names
[FR_REG (1)];
3597 else if (GET_CODE (x
) == REG
)
3598 str
= reg_names
[REGNO (x
)];
3605 fputs (reg_names
[REGNO (x
) + 1], file
);
3611 unsigned int regno
= REGNO (XEXP (x
, 0));
3612 if (GET_CODE (x
) == EQ
)
3616 fputs (reg_names
[regno
], file
);
3621 if (MEM_VOLATILE_P (x
))
3622 fputs(".acq", file
);
3627 HOST_WIDE_INT value
;
3629 switch (GET_CODE (XEXP (x
, 0)))
3635 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3636 if (GET_CODE (x
) == CONST_INT
)
3638 else if (GET_CODE (x
) == REG
)
3640 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3648 value
= GET_MODE_SIZE (GET_MODE (x
));
3652 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3656 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
3661 if (MEM_VOLATILE_P (x
))
3662 fputs(".rel", file
);
3666 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3670 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3672 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3678 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3680 const char *prefix
= "0x";
3681 if (INTVAL (x
) & 0x80000000)
3683 fprintf (file
, "0xffffffff");
3686 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3692 /* If this operand is the constant zero, write it as register zero.
3693 Any register, zero, or CONST_INT value is OK here. */
3694 if (GET_CODE (x
) == REG
)
3695 fputs (reg_names
[REGNO (x
)], file
);
3696 else if (x
== CONST0_RTX (GET_MODE (x
)))
3698 else if (GET_CODE (x
) == CONST_INT
)
3699 output_addr_const (file
, x
);
3701 output_operand_lossage ("invalid %%r value");
3708 /* For conditional branches, returns or calls, substitute
3709 sptk, dptk, dpnt, or spnt for %s. */
3710 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3713 int pred_val
= INTVAL (XEXP (x
, 0));
3715 /* Guess top and bottom 10% statically predicted. */
3716 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3718 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3720 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3725 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3730 fputs (which
, file
);
3735 x
= current_insn_predicate
;
3738 unsigned int regno
= REGNO (XEXP (x
, 0));
3739 if (GET_CODE (x
) == EQ
)
3741 fprintf (file
, "(%s) ", reg_names
[regno
]);
3746 output_operand_lossage ("ia64_print_operand: unknown code");
3750 switch (GET_CODE (x
))
3752 /* This happens for the spill/restore instructions. */
3757 /* ... fall through ... */
3760 fputs (reg_names
[REGNO (x
)], file
);
3765 rtx addr
= XEXP (x
, 0);
3766 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
3767 addr
= XEXP (addr
, 0);
3768 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3773 output_addr_const (file
, x
);
3780 /* Compute a (partial) cost for rtx X. Return true if the complete
3781 cost has been computed, and false if subexpressions should be
3782 scanned. In either case, *TOTAL contains the cost result. */
3783 /* ??? This is incomplete. */
3786 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
3794 *total
= CONST_OK_FOR_J (INTVAL (x
)) ? 0 : COSTS_N_INSNS (1);
3797 if (CONST_OK_FOR_I (INTVAL (x
)))
3799 else if (CONST_OK_FOR_J (INTVAL (x
)))
3802 *total
= COSTS_N_INSNS (1);
3805 if (CONST_OK_FOR_K (INTVAL (x
)) || CONST_OK_FOR_L (INTVAL (x
)))
3808 *total
= COSTS_N_INSNS (1);
3813 *total
= COSTS_N_INSNS (1);
3819 *total
= COSTS_N_INSNS (3);
3823 /* For multiplies wider than HImode, we have to go to the FPU,
3824 which normally involves copies. Plus there's the latency
3825 of the multiply itself, and the latency of the instructions to
3826 transfer integer regs to FP regs. */
3827 /* ??? Check for FP mode. */
3828 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
3829 *total
= COSTS_N_INSNS (10);
3831 *total
= COSTS_N_INSNS (2);
3839 *total
= COSTS_N_INSNS (1);
3846 /* We make divide expensive, so that divide-by-constant will be
3847 optimized to a multiply. */
3848 *total
= COSTS_N_INSNS (60);
3856 /* Calculate the cost of moving data from a register in class FROM to
3857 one in class TO, using MODE. */
3860 ia64_register_move_cost (enum machine_mode mode
, enum reg_class from
,
3863 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3864 if (to
== ADDL_REGS
)
3866 if (from
== ADDL_REGS
)
3869 /* All costs are symmetric, so reduce cases by putting the
3870 lower number class as the destination. */
3873 enum reg_class tmp
= to
;
3874 to
= from
, from
= tmp
;
3877 /* Moving from FR<->GR in XFmode must be more expensive than 2,
3878 so that we get secondary memory reloads. Between FR_REGS,
3879 we have to make this at least as expensive as MEMORY_MOVE_COST
3880 to avoid spectacularly poor register class preferencing. */
3883 if (to
!= GR_REGS
|| from
!= GR_REGS
)
3884 return MEMORY_MOVE_COST (mode
, to
, 0);
3892 /* Moving between PR registers takes two insns. */
3893 if (from
== PR_REGS
)
3895 /* Moving between PR and anything but GR is impossible. */
3896 if (from
!= GR_REGS
)
3897 return MEMORY_MOVE_COST (mode
, to
, 0);
3901 /* Moving between BR and anything but GR is impossible. */
3902 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
3903 return MEMORY_MOVE_COST (mode
, to
, 0);
3908 /* Moving between AR and anything but GR is impossible. */
3909 if (from
!= GR_REGS
)
3910 return MEMORY_MOVE_COST (mode
, to
, 0);
3915 case GR_AND_FR_REGS
:
3916 case GR_AND_BR_REGS
:
3927 /* This function returns the register class required for a secondary
3928 register when copying between one of the registers in CLASS, and X,
3929 using MODE. A return value of NO_REGS means that no secondary register
3933 ia64_secondary_reload_class (enum reg_class
class,
3934 enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3938 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3939 regno
= true_regnum (x
);
3946 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3947 interaction. We end up with two pseudos with overlapping lifetimes
3948 both of which are equiv to the same constant, and both which need
3949 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3950 changes depending on the path length, which means the qty_first_reg
3951 check in make_regs_eqv can give different answers at different times.
3952 At some point I'll probably need a reload_indi pattern to handle
3955 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3956 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3957 non-general registers for good measure. */
3958 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
3961 /* This is needed if a pseudo used as a call_operand gets spilled to a
3963 if (GET_CODE (x
) == MEM
)
3968 /* Need to go through general registers to get to other class regs. */
3969 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
3972 /* This can happen when a paradoxical subreg is an operand to the
3974 /* ??? This shouldn't be necessary after instruction scheduling is
3975 enabled, because paradoxical subregs are not accepted by
3976 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3977 stop the paradoxical subreg stupidity in the *_operand functions
3979 if (GET_CODE (x
) == MEM
3980 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3981 || GET_MODE (x
) == QImode
))
3984 /* This can happen because of the ior/and/etc patterns that accept FP
3985 registers as operands. If the third operand is a constant, then it
3986 needs to be reloaded into a FP register. */
3987 if (GET_CODE (x
) == CONST_INT
)
3990 /* This can happen because of register elimination in a muldi3 insn.
3991 E.g. `26107 * (unsigned long)&u'. */
3992 if (GET_CODE (x
) == PLUS
)
3997 /* ??? This happens if we cse/gcse a BImode value across a call,
3998 and the function has a nonlocal goto. This is because global
3999 does not allocate call crossing pseudos to hard registers when
4000 current_function_has_nonlocal_goto is true. This is relatively
4001 common for C++ programs that use exceptions. To reproduce,
4002 return NO_REGS and compile libstdc++. */
4003 if (GET_CODE (x
) == MEM
)
4006 /* This can happen when we take a BImode subreg of a DImode value,
4007 and that DImode value winds up in some non-GR register. */
4008 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
4020 /* Emit text to declare externally defined variables and functions, because
4021 the Intel assembler does not support undefined externals. */
4024 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
4026 int save_referenced
;
4028 /* GNU as does not need anything here, but the HP linker does need
4029 something for external functions. */
4033 || TREE_CODE (decl
) != FUNCTION_DECL
4034 || strstr (name
, "__builtin_") == name
))
4037 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4038 the linker when we do this, so we need to be careful not to do this for
4039 builtin functions which have no library equivalent. Unfortunately, we
4040 can't tell here whether or not a function will actually be called by
4041 expand_expr, so we pull in library functions even if we may not need
4043 if (! strcmp (name
, "__builtin_next_arg")
4044 || ! strcmp (name
, "alloca")
4045 || ! strcmp (name
, "__builtin_constant_p")
4046 || ! strcmp (name
, "__builtin_args_info"))
4050 ia64_hpux_add_extern_decl (decl
);
4053 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4055 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
4056 if (TREE_CODE (decl
) == FUNCTION_DECL
)
4057 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
4058 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4059 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
4063 /* Parse the -mfixed-range= option string. */
4066 fix_range (const char *const_str
)
4069 char *str
, *dash
, *comma
;
4071 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4072 REG2 are either register names or register numbers. The effect
4073 of this option is to mark the registers in the range from REG1 to
4074 REG2 as ``fixed'' so they won't be used by the compiler. This is
4075 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4077 i
= strlen (const_str
);
4078 str
= (char *) alloca (i
+ 1);
4079 memcpy (str
, const_str
, i
+ 1);
4083 dash
= strchr (str
, '-');
4086 warning ("value of -mfixed-range must have form REG1-REG2");
4091 comma
= strchr (dash
+ 1, ',');
4095 first
= decode_reg_name (str
);
4098 warning ("unknown register name: %s", str
);
4102 last
= decode_reg_name (dash
+ 1);
4105 warning ("unknown register name: %s", dash
+ 1);
4113 warning ("%s-%s is an empty range", str
, dash
+ 1);
4117 for (i
= first
; i
<= last
; ++i
)
4118 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4128 static struct machine_function
*
4129 ia64_init_machine_status (void)
4131 return ggc_alloc_cleared (sizeof (struct machine_function
));
4134 /* Handle TARGET_OPTIONS switches. */
4137 ia64_override_options (void)
4141 const char *const name
; /* processor name or nickname. */
4142 const enum processor_type processor
;
4144 const processor_alias_table
[] =
4146 {"itanium", PROCESSOR_ITANIUM
},
4147 {"itanium1", PROCESSOR_ITANIUM
},
4148 {"merced", PROCESSOR_ITANIUM
},
4149 {"itanium2", PROCESSOR_ITANIUM2
},
4150 {"mckinley", PROCESSOR_ITANIUM2
},
4153 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
4156 if (TARGET_AUTO_PIC
)
4157 target_flags
|= MASK_CONST_GP
;
4159 if (TARGET_INLINE_FLOAT_DIV_LAT
&& TARGET_INLINE_FLOAT_DIV_THR
)
4161 if ((target_flags_explicit
& MASK_INLINE_FLOAT_DIV_LAT
)
4162 && (target_flags_explicit
& MASK_INLINE_FLOAT_DIV_THR
))
4164 warning ("cannot optimize floating point division for both latency and throughput");
4165 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4169 if (target_flags_explicit
& MASK_INLINE_FLOAT_DIV_THR
)
4170 target_flags
&= ~MASK_INLINE_FLOAT_DIV_LAT
;
4172 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4176 if (TARGET_INLINE_INT_DIV_LAT
&& TARGET_INLINE_INT_DIV_THR
)
4178 if ((target_flags_explicit
& MASK_INLINE_INT_DIV_LAT
)
4179 && (target_flags_explicit
& MASK_INLINE_INT_DIV_THR
))
4181 warning ("cannot optimize integer division for both latency and throughput");
4182 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4186 if (target_flags_explicit
& MASK_INLINE_INT_DIV_THR
)
4187 target_flags
&= ~MASK_INLINE_INT_DIV_LAT
;
4189 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4193 if (TARGET_INLINE_SQRT_LAT
&& TARGET_INLINE_SQRT_THR
)
4195 if ((target_flags_explicit
& MASK_INLINE_SQRT_LAT
)
4196 && (target_flags_explicit
& MASK_INLINE_SQRT_THR
))
4198 warning ("cannot optimize square root for both latency and throughput");
4199 target_flags
&= ~MASK_INLINE_SQRT_THR
;
4203 if (target_flags_explicit
& MASK_INLINE_SQRT_THR
)
4204 target_flags
&= ~MASK_INLINE_SQRT_LAT
;
4206 target_flags
&= ~MASK_INLINE_SQRT_THR
;
4210 if (TARGET_INLINE_SQRT_LAT
)
4212 warning ("not yet implemented: latency-optimized inline square root");
4213 target_flags
&= ~MASK_INLINE_SQRT_LAT
;
4216 if (ia64_fixed_range_string
)
4217 fix_range (ia64_fixed_range_string
);
4219 if (ia64_tls_size_string
)
4222 unsigned long tmp
= strtoul (ia64_tls_size_string
, &end
, 10);
4223 if (*end
|| (tmp
!= 14 && tmp
!= 22 && tmp
!= 64))
4224 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string
);
4226 ia64_tls_size
= tmp
;
4229 if (!ia64_tune_string
)
4230 ia64_tune_string
= "itanium2";
4232 for (i
= 0; i
< pta_size
; i
++)
4233 if (! strcmp (ia64_tune_string
, processor_alias_table
[i
].name
))
4235 ia64_tune
= processor_alias_table
[i
].processor
;
4240 error ("bad value (%s) for -tune= switch", ia64_tune_string
);
4242 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
4243 flag_schedule_insns_after_reload
= 0;
4245 /* Variable tracking should be run after all optimizations which change order
4246 of insns. It also needs a valid CFG. */
4247 ia64_flag_var_tracking
= flag_var_tracking
;
4248 flag_var_tracking
= 0;
4250 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
4252 init_machine_status
= ia64_init_machine_status
;
4255 static enum attr_itanium_class
ia64_safe_itanium_class (rtx
);
4256 static enum attr_type
ia64_safe_type (rtx
);
4258 static enum attr_itanium_class
4259 ia64_safe_itanium_class (rtx insn
)
4261 if (recog_memoized (insn
) >= 0)
4262 return get_attr_itanium_class (insn
);
4264 return ITANIUM_CLASS_UNKNOWN
;
4267 static enum attr_type
4268 ia64_safe_type (rtx insn
)
4270 if (recog_memoized (insn
) >= 0)
4271 return get_attr_type (insn
);
4273 return TYPE_UNKNOWN
;
4276 /* The following collection of routines emit instruction group stop bits as
4277 necessary to avoid dependencies. */
4279 /* Need to track some additional registers as far as serialization is
4280 concerned so we can properly handle br.call and br.ret. We could
4281 make these registers visible to gcc, but since these registers are
4282 never explicitly used in gcc generated code, it seems wasteful to
4283 do so (plus it would make the call and return patterns needlessly
4285 #define REG_RP (BR_REG (0))
4286 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4287 /* This is used for volatile asms which may require a stop bit immediately
4288 before and after them. */
4289 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4290 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4291 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4293 /* For each register, we keep track of how it has been written in the
4294 current instruction group.
4296 If a register is written unconditionally (no qualifying predicate),
4297 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4299 If a register is written if its qualifying predicate P is true, we
4300 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4301 may be written again by the complement of P (P^1) and when this happens,
4302 WRITE_COUNT gets set to 2.
4304 The result of this is that whenever an insn attempts to write a register
4305 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4307 If a predicate register is written by a floating-point insn, we set
4308 WRITTEN_BY_FP to true.
4310 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4311 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4313 struct reg_write_state
4315 unsigned int write_count
: 2;
4316 unsigned int first_pred
: 16;
4317 unsigned int written_by_fp
: 1;
4318 unsigned int written_by_and
: 1;
4319 unsigned int written_by_or
: 1;
4322 /* Cumulative info for the current instruction group. */
4323 struct reg_write_state rws_sum
[NUM_REGS
];
4324 /* Info for the current instruction. This gets copied to rws_sum after a
4325 stop bit is emitted. */
4326 struct reg_write_state rws_insn
[NUM_REGS
];
4328 /* Indicates whether this is the first instruction after a stop bit,
4329 in which case we don't need another stop bit. Without this, we hit
4330 the abort in ia64_variable_issue when scheduling an alloc. */
4331 static int first_instruction
;
4333 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4334 RTL for one instruction. */
4337 unsigned int is_write
: 1; /* Is register being written? */
4338 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4339 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4340 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4341 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4342 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4345 static void rws_update (struct reg_write_state
*, int, struct reg_flags
, int);
4346 static int rws_access_regno (int, struct reg_flags
, int);
4347 static int rws_access_reg (rtx
, struct reg_flags
, int);
4348 static void update_set_flags (rtx
, struct reg_flags
*, int *, rtx
*);
4349 static int set_src_needs_barrier (rtx
, struct reg_flags
, int, rtx
);
4350 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
4351 static void init_insn_group_barriers (void);
4352 static int group_barrier_needed_p (rtx
);
4353 static int safe_group_barrier_needed_p (rtx
);
4355 /* Update *RWS for REGNO, which is being written by the current instruction,
4356 with predicate PRED, and associated register flags in FLAGS. */
4359 rws_update (struct reg_write_state
*rws
, int regno
, struct reg_flags flags
, int pred
)
4362 rws
[regno
].write_count
++;
4364 rws
[regno
].write_count
= 2;
4365 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4366 /* ??? Not tracking and/or across differing predicates. */
4367 rws
[regno
].written_by_and
= flags
.is_and
;
4368 rws
[regno
].written_by_or
= flags
.is_or
;
4369 rws
[regno
].first_pred
= pred
;
4372 /* Handle an access to register REGNO of type FLAGS using predicate register
4373 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4374 a dependency with an earlier instruction in the same group. */
4377 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
4379 int need_barrier
= 0;
4381 if (regno
>= NUM_REGS
)
4384 if (! PR_REGNO_P (regno
))
4385 flags
.is_and
= flags
.is_or
= 0;
4391 /* One insn writes same reg multiple times? */
4392 if (rws_insn
[regno
].write_count
> 0)
4395 /* Update info for current instruction. */
4396 rws_update (rws_insn
, regno
, flags
, pred
);
4397 write_count
= rws_sum
[regno
].write_count
;
4399 switch (write_count
)
4402 /* The register has not been written yet. */
4403 rws_update (rws_sum
, regno
, flags
, pred
);
4407 /* The register has been written via a predicate. If this is
4408 not a complementary predicate, then we need a barrier. */
4409 /* ??? This assumes that P and P+1 are always complementary
4410 predicates for P even. */
4411 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4413 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4415 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4417 rws_update (rws_sum
, regno
, flags
, pred
);
4421 /* The register has been unconditionally written already. We
4423 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4425 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4429 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4430 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4439 if (flags
.is_branch
)
4441 /* Branches have several RAW exceptions that allow to avoid
4444 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4445 /* RAW dependencies on branch regs are permissible as long
4446 as the writer is a non-branch instruction. Since we
4447 never generate code that uses a branch register written
4448 by a branch instruction, handling this case is
4452 if (REGNO_REG_CLASS (regno
) == PR_REGS
4453 && ! rws_sum
[regno
].written_by_fp
)
4454 /* The predicates of a branch are available within the
4455 same insn group as long as the predicate was written by
4456 something other than a floating-point instruction. */
4460 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4462 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4465 switch (rws_sum
[regno
].write_count
)
4468 /* The register has not been written yet. */
4472 /* The register has been written via a predicate. If this is
4473 not a complementary predicate, then we need a barrier. */
4474 /* ??? This assumes that P and P+1 are always complementary
4475 predicates for P even. */
4476 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4481 /* The register has been unconditionally written already. We
4491 return need_barrier
;
4495 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
4497 int regno
= REGNO (reg
);
4498 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4501 return rws_access_regno (regno
, flags
, pred
);
4504 int need_barrier
= 0;
4506 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4507 return need_barrier
;
4511 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4512 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4515 update_set_flags (rtx x
, struct reg_flags
*pflags
, int *ppred
, rtx
*pcond
)
4517 rtx src
= SET_SRC (x
);
4521 switch (GET_CODE (src
))
4527 if (SET_DEST (x
) == pc_rtx
)
4528 /* X is a conditional branch. */
4532 int is_complemented
= 0;
4534 /* X is a conditional move. */
4535 rtx cond
= XEXP (src
, 0);
4536 if (GET_CODE (cond
) == EQ
)
4537 is_complemented
= 1;
4538 cond
= XEXP (cond
, 0);
4539 if (GET_CODE (cond
) != REG
4540 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4543 if (XEXP (src
, 1) == SET_DEST (x
)
4544 || XEXP (src
, 2) == SET_DEST (x
))
4546 /* X is a conditional move that conditionally writes the
4549 /* We need another complement in this case. */
4550 if (XEXP (src
, 1) == SET_DEST (x
))
4551 is_complemented
= ! is_complemented
;
4553 *ppred
= REGNO (cond
);
4554 if (is_complemented
)
4558 /* ??? If this is a conditional write to the dest, then this
4559 instruction does not actually read one source. This probably
4560 doesn't matter, because that source is also the dest. */
4561 /* ??? Multiple writes to predicate registers are allowed
4562 if they are all AND type compares, or if they are all OR
4563 type compares. We do not generate such instructions
4566 /* ... fall through ... */
4569 if (COMPARISON_P (src
)
4570 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4571 /* Set pflags->is_fp to 1 so that we know we're dealing
4572 with a floating point comparison when processing the
4573 destination of the SET. */
4576 /* Discover if this is a parallel comparison. We only handle
4577 and.orcm and or.andcm at present, since we must retain a
4578 strict inverse on the predicate pair. */
4579 else if (GET_CODE (src
) == AND
)
4581 else if (GET_CODE (src
) == IOR
)
4588 /* Subroutine of rtx_needs_barrier; this function determines whether the
4589 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4590 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4594 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
, rtx cond
)
4596 int need_barrier
= 0;
4598 rtx src
= SET_SRC (x
);
4600 if (GET_CODE (src
) == CALL
)
4601 /* We don't need to worry about the result registers that
4602 get written by subroutine call. */
4603 return rtx_needs_barrier (src
, flags
, pred
);
4604 else if (SET_DEST (x
) == pc_rtx
)
4606 /* X is a conditional branch. */
4607 /* ??? This seems redundant, as the caller sets this bit for
4609 flags
.is_branch
= 1;
4610 return rtx_needs_barrier (src
, flags
, pred
);
4613 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4615 /* This instruction unconditionally uses a predicate register. */
4617 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4620 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4622 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4623 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4624 dst
= XEXP (dst
, 0);
4626 return need_barrier
;
4629 /* Handle an access to rtx X of type FLAGS using predicate register
4630 PRED. Return 1 if this access creates a dependency with an earlier
4631 instruction in the same group. */
4634 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
4637 int is_complemented
= 0;
4638 int need_barrier
= 0;
4639 const char *format_ptr
;
4640 struct reg_flags new_flags
;
4648 switch (GET_CODE (x
))
4651 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4652 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4653 if (GET_CODE (SET_SRC (x
)) != CALL
)
4655 new_flags
.is_write
= 1;
4656 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4661 new_flags
.is_write
= 0;
4662 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4664 /* Avoid multiple register writes, in case this is a pattern with
4665 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4666 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4668 new_flags
.is_write
= 1;
4669 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4670 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4671 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4676 /* X is a predicated instruction. */
4678 cond
= COND_EXEC_TEST (x
);
4681 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4683 if (GET_CODE (cond
) == EQ
)
4684 is_complemented
= 1;
4685 cond
= XEXP (cond
, 0);
4686 if (GET_CODE (cond
) != REG
4687 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4689 pred
= REGNO (cond
);
4690 if (is_complemented
)
4693 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4694 return need_barrier
;
4698 /* Clobber & use are for earlier compiler-phases only. */
4703 /* We always emit stop bits for traditional asms. We emit stop bits
4704 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4705 if (GET_CODE (x
) != ASM_OPERANDS
4706 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4708 /* Avoid writing the register multiple times if we have multiple
4709 asm outputs. This avoids an abort in rws_access_reg. */
4710 if (! rws_insn
[REG_VOLATILE
].write_count
)
4712 new_flags
.is_write
= 1;
4713 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4718 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4719 We cannot just fall through here since then we would be confused
4720 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4721 traditional asms unlike their normal usage. */
4723 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4724 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4729 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4731 rtx pat
= XVECEXP (x
, 0, i
);
4732 if (GET_CODE (pat
) == SET
)
4734 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4735 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4737 else if (GET_CODE (pat
) == USE
4738 || GET_CODE (pat
) == CALL
4739 || GET_CODE (pat
) == ASM_OPERANDS
)
4740 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4741 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4744 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4746 rtx pat
= XVECEXP (x
, 0, i
);
4747 if (GET_CODE (pat
) == SET
)
4749 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4751 new_flags
.is_write
= 1;
4752 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4756 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
4757 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4765 if (REGNO (x
) == AR_UNAT_REGNUM
)
4767 for (i
= 0; i
< 64; ++i
)
4768 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4771 need_barrier
= rws_access_reg (x
, flags
, pred
);
4775 /* Find the regs used in memory address computation. */
4776 new_flags
.is_write
= 0;
4777 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4780 case CONST_INT
: case CONST_DOUBLE
:
4781 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4784 /* Operators with side-effects. */
4785 case POST_INC
: case POST_DEC
:
4786 if (GET_CODE (XEXP (x
, 0)) != REG
)
4789 new_flags
.is_write
= 0;
4790 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4791 new_flags
.is_write
= 1;
4792 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4796 if (GET_CODE (XEXP (x
, 0)) != REG
)
4799 new_flags
.is_write
= 0;
4800 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4801 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4802 new_flags
.is_write
= 1;
4803 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4806 /* Handle common unary and binary ops for efficiency. */
4807 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4808 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4809 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4810 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4811 case NE
: case EQ
: case GE
: case GT
: case LE
:
4812 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4813 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4814 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4817 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4818 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4819 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4820 case SQRT
: case FFS
: case POPCOUNT
:
4821 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4825 switch (XINT (x
, 1))
4827 case UNSPEC_LTOFF_DTPMOD
:
4828 case UNSPEC_LTOFF_DTPREL
:
4830 case UNSPEC_LTOFF_TPREL
:
4832 case UNSPEC_PRED_REL_MUTEX
:
4833 case UNSPEC_PIC_CALL
:
4835 case UNSPEC_FETCHADD_ACQ
:
4836 case UNSPEC_BSP_VALUE
:
4837 case UNSPEC_FLUSHRS
:
4838 case UNSPEC_BUNDLE_SELECTOR
:
4841 case UNSPEC_GR_SPILL
:
4842 case UNSPEC_GR_RESTORE
:
4844 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4845 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4847 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4848 new_flags
.is_write
= (XINT (x
, 1) == 1);
4849 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4854 case UNSPEC_FR_SPILL
:
4855 case UNSPEC_FR_RESTORE
:
4856 case UNSPEC_GETF_EXP
:
4857 case UNSPEC_SETF_EXP
:
4859 case UNSPEC_FR_SQRT_RECIP_APPROX
:
4860 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4863 case UNSPEC_FR_RECIP_APPROX
:
4864 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4865 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4868 case UNSPEC_CMPXCHG_ACQ
:
4869 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4870 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4878 case UNSPEC_VOLATILE
:
4879 switch (XINT (x
, 1))
4882 /* Alloc must always be the first instruction of a group.
4883 We force this by always returning true. */
4884 /* ??? We might get better scheduling if we explicitly check for
4885 input/local/output register dependencies, and modify the
4886 scheduler so that alloc is always reordered to the start of
4887 the current group. We could then eliminate all of the
4888 first_instruction code. */
4889 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4891 new_flags
.is_write
= 1;
4892 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4895 case UNSPECV_SET_BSP
:
4899 case UNSPECV_BLOCKAGE
:
4900 case UNSPECV_INSN_GROUP_BARRIER
:
4902 case UNSPECV_PSAC_ALL
:
4903 case UNSPECV_PSAC_NORMAL
:
4912 new_flags
.is_write
= 0;
4913 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4914 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4916 new_flags
.is_write
= 1;
4917 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4918 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4922 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4923 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4924 switch (format_ptr
[i
])
4926 case '0': /* unused field */
4927 case 'i': /* integer */
4928 case 'n': /* note */
4929 case 'w': /* wide integer */
4930 case 's': /* pointer to string */
4931 case 'S': /* optional pointer to string */
4935 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4940 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4941 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4950 return need_barrier
;
4953 /* Clear out the state for group_barrier_needed_p at the start of a
4954 sequence of insns. */
4957 init_insn_group_barriers (void)
4959 memset (rws_sum
, 0, sizeof (rws_sum
));
4960 first_instruction
= 1;
4963 /* Given the current state, recorded by previous calls to this function,
4964 determine whether a group barrier (a stop bit) is necessary before INSN.
4965 Return nonzero if so. */
4968 group_barrier_needed_p (rtx insn
)
4971 int need_barrier
= 0;
4972 struct reg_flags flags
;
4974 memset (&flags
, 0, sizeof (flags
));
4975 switch (GET_CODE (insn
))
4981 /* A barrier doesn't imply an instruction group boundary. */
4985 memset (rws_insn
, 0, sizeof (rws_insn
));
4989 flags
.is_branch
= 1;
4990 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4991 memset (rws_insn
, 0, sizeof (rws_insn
));
4993 /* Don't bundle a call following another call. */
4994 if ((pat
= prev_active_insn (insn
))
4995 && GET_CODE (pat
) == CALL_INSN
)
5001 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
5005 flags
.is_branch
= 1;
5007 /* Don't bundle a jump following a call. */
5008 if ((pat
= prev_active_insn (insn
))
5009 && GET_CODE (pat
) == CALL_INSN
)
5017 if (GET_CODE (PATTERN (insn
)) == USE
5018 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
5019 /* Don't care about USE and CLOBBER "insns"---those are used to
5020 indicate to the optimizer that it shouldn't get rid of
5021 certain operations. */
5024 pat
= PATTERN (insn
);
5026 /* Ug. Hack hacks hacked elsewhere. */
5027 switch (recog_memoized (insn
))
5029 /* We play dependency tricks with the epilogue in order
5030 to get proper schedules. Undo this for dv analysis. */
5031 case CODE_FOR_epilogue_deallocate_stack
:
5032 case CODE_FOR_prologue_allocate_stack
:
5033 pat
= XVECEXP (pat
, 0, 0);
5036 /* The pattern we use for br.cloop confuses the code above.
5037 The second element of the vector is representative. */
5038 case CODE_FOR_doloop_end_internal
:
5039 pat
= XVECEXP (pat
, 0, 1);
5042 /* Doesn't generate code. */
5043 case CODE_FOR_pred_rel_mutex
:
5044 case CODE_FOR_prologue_use
:
5051 memset (rws_insn
, 0, sizeof (rws_insn
));
5052 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
5054 /* Check to see if the previous instruction was a volatile
5057 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
5064 if (first_instruction
&& INSN_P (insn
)
5065 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
5066 && GET_CODE (PATTERN (insn
)) != USE
5067 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
5070 first_instruction
= 0;
5073 return need_barrier
;
5076 /* Like group_barrier_needed_p, but do not clobber the current state. */
5079 safe_group_barrier_needed_p (rtx insn
)
5081 struct reg_write_state rws_saved
[NUM_REGS
];
5082 int saved_first_instruction
;
5085 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
5086 saved_first_instruction
= first_instruction
;
5088 t
= group_barrier_needed_p (insn
);
5090 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
5091 first_instruction
= saved_first_instruction
;
5096 /* Scan the current function and insert stop bits as necessary to
5097 eliminate dependencies. This function assumes that a final
5098 instruction scheduling pass has been run which has already
5099 inserted most of the necessary stop bits. This function only
5100 inserts new ones at basic block boundaries, since these are
5101 invisible to the scheduler. */
5104 emit_insn_group_barriers (FILE *dump
)
5108 int insns_since_last_label
= 0;
5110 init_insn_group_barriers ();
5112 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5114 if (GET_CODE (insn
) == CODE_LABEL
)
5116 if (insns_since_last_label
)
5118 insns_since_last_label
= 0;
5120 else if (GET_CODE (insn
) == NOTE
5121 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
5123 if (insns_since_last_label
)
5125 insns_since_last_label
= 0;
5127 else if (GET_CODE (insn
) == INSN
5128 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
5129 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
5131 init_insn_group_barriers ();
5134 else if (INSN_P (insn
))
5136 insns_since_last_label
= 1;
5138 if (group_barrier_needed_p (insn
))
5143 fprintf (dump
, "Emitting stop before label %d\n",
5144 INSN_UID (last_label
));
5145 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
5148 init_insn_group_barriers ();
5156 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5157 This function has to emit all necessary group barriers. */
5160 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
5164 init_insn_group_barriers ();
5166 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5168 if (GET_CODE (insn
) == BARRIER
)
5170 rtx last
= prev_active_insn (insn
);
5174 if (GET_CODE (last
) == JUMP_INSN
5175 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
5176 last
= prev_active_insn (last
);
5177 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
5178 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
5180 init_insn_group_barriers ();
5182 else if (INSN_P (insn
))
5184 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
5185 init_insn_group_barriers ();
5186 else if (group_barrier_needed_p (insn
))
5188 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5189 init_insn_group_barriers ();
5190 group_barrier_needed_p (insn
);
5197 static int errata_find_address_regs (rtx
*, void *);
5198 static void errata_emit_nops (rtx
);
5199 static void fixup_errata (void);
5201 /* This structure is used to track some details about the previous insns
5202 groups so we can determine if it may be necessary to insert NOPs to
5203 workaround hardware errata. */
5206 HARD_REG_SET p_reg_set
;
5207 HARD_REG_SET gr_reg_conditionally_set
;
5210 /* Index into the last_group array. */
5211 static int group_idx
;
5213 /* Called through for_each_rtx; determines if a hard register that was
5214 conditionally set in the previous group is used as an address register.
5215 It ensures that for_each_rtx returns 1 in that case. */
5217 errata_find_address_regs (rtx
*xp
, void *data ATTRIBUTE_UNUSED
)
5220 if (GET_CODE (x
) != MEM
)
5223 if (GET_CODE (x
) == POST_MODIFY
)
5225 if (GET_CODE (x
) == REG
)
5227 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5228 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
5236 /* Called for each insn; this function keeps track of the state in
5237 last_group and emits additional NOPs if necessary to work around
5238 an Itanium A/B step erratum. */
5240 errata_emit_nops (rtx insn
)
5242 struct group
*this_group
= last_group
+ group_idx
;
5243 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5244 rtx pat
= PATTERN (insn
);
5245 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5246 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5247 enum attr_type type
;
5250 if (GET_CODE (real_pat
) == USE
5251 || GET_CODE (real_pat
) == CLOBBER
5252 || GET_CODE (real_pat
) == ASM_INPUT
5253 || GET_CODE (real_pat
) == ADDR_VEC
5254 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5255 || asm_noperands (PATTERN (insn
)) >= 0)
5258 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5261 if (GET_CODE (set
) == PARALLEL
)
5264 set
= XVECEXP (real_pat
, 0, 0);
5265 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5266 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5267 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5274 if (set
&& GET_CODE (set
) != SET
)
5277 type
= get_attr_type (insn
);
5280 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5281 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5283 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5284 && REG_P (SET_DEST (set
))
5285 && GET_CODE (SET_SRC (set
)) != PLUS
5286 && GET_CODE (SET_SRC (set
)) != MINUS
5287 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5288 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5289 && (GET_CODE (SET_SRC (set
)) != MEM
5290 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5291 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5293 if (!COMPARISON_P (cond
)
5294 || !REG_P (XEXP (cond
, 0)))
5297 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5298 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5300 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5302 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5303 emit_insn_before (gen_nop (), insn
);
5304 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5306 memset (last_group
, 0, sizeof last_group
);
5310 /* Emit extra nops if they are required to work around hardware errata. */
5317 if (! TARGET_B_STEP
)
5321 memset (last_group
, 0, sizeof last_group
);
5323 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5328 if (ia64_safe_type (insn
) == TYPE_S
)
5331 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5334 errata_emit_nops (insn
);
5339 /* Instruction scheduling support. */
5341 #define NR_BUNDLES 10
5343 /* A list of names of all available bundles. */
5345 static const char *bundle_name
[NR_BUNDLES
] =
5351 #if NR_BUNDLES == 10
5361 /* Nonzero if we should insert stop bits into the schedule. */
5363 int ia64_final_schedule
= 0;
5365 /* Codes of the corresponding quieryied units: */
5367 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
5368 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
5370 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
5371 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
5373 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
5375 /* The following variable value is an insn group barrier. */
5377 static rtx dfa_stop_insn
;
5379 /* The following variable value is the last issued insn. */
5381 static rtx last_scheduled_insn
;
5383 /* The following variable value is size of the DFA state. */
5385 static size_t dfa_state_size
;
5387 /* The following variable value is pointer to a DFA state used as
5388 temporary variable. */
5390 static state_t temp_dfa_state
= NULL
;
5392 /* The following variable value is DFA state after issuing the last
5395 static state_t prev_cycle_state
= NULL
;
5397 /* The following array element values are TRUE if the corresponding
5398 insn requires to add stop bits before it. */
5400 static char *stops_p
;
5402 /* The following variable is used to set up the mentioned above array. */
5404 static int stop_before_p
= 0;
5406 /* The following variable value is length of the arrays `clocks' and
5409 static int clocks_length
;
5411 /* The following array element values are cycles on which the
5412 corresponding insn will be issued. The array is used only for
5417 /* The following array element values are numbers of cycles should be
5418 added to improve insn scheduling for MM_insns for Itanium1. */
5420 static int *add_cycles
;
5422 static rtx
ia64_single_set (rtx
);
5423 static void ia64_emit_insn_before (rtx
, rtx
);
5425 /* Map a bundle number to its pseudo-op. */
5428 get_bundle_name (int b
)
5430 return bundle_name
[b
];
5434 /* Return the maximum number of instructions a cpu can issue. */
5437 ia64_issue_rate (void)
5442 /* Helper function - like single_set, but look inside COND_EXEC. */
5445 ia64_single_set (rtx insn
)
5447 rtx x
= PATTERN (insn
), ret
;
5448 if (GET_CODE (x
) == COND_EXEC
)
5449 x
= COND_EXEC_CODE (x
);
5450 if (GET_CODE (x
) == SET
)
5453 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5454 Although they are not classical single set, the second set is there just
5455 to protect it from moving past FP-relative stack accesses. */
5456 switch (recog_memoized (insn
))
5458 case CODE_FOR_prologue_allocate_stack
:
5459 case CODE_FOR_epilogue_deallocate_stack
:
5460 ret
= XVECEXP (x
, 0, 0);
5464 ret
= single_set_2 (insn
, x
);
5471 /* Adjust the cost of a scheduling dependency. Return the new cost of
5472 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5475 ia64_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
5477 enum attr_itanium_class dep_class
;
5478 enum attr_itanium_class insn_class
;
5480 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
)
5483 insn_class
= ia64_safe_itanium_class (insn
);
5484 dep_class
= ia64_safe_itanium_class (dep_insn
);
5485 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
5486 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
5492 /* Like emit_insn_before, but skip cycle_display notes.
5493 ??? When cycle display notes are implemented, update this. */
5496 ia64_emit_insn_before (rtx insn
, rtx before
)
5498 emit_insn_before (insn
, before
);
5501 /* The following function marks insns who produce addresses for load
5502 and store insns. Such insns will be placed into M slots because it
5503 decrease latency time for Itanium1 (see function
5504 `ia64_produce_address_p' and the DFA descriptions). */
5507 ia64_dependencies_evaluation_hook (rtx head
, rtx tail
)
5509 rtx insn
, link
, next
, next_tail
;
5511 next_tail
= NEXT_INSN (tail
);
5512 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5515 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5517 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
5519 for (link
= INSN_DEPEND (insn
); link
!= 0; link
= XEXP (link
, 1))
5521 next
= XEXP (link
, 0);
5522 if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_ST
5523 || ia64_safe_itanium_class (next
) == ITANIUM_CLASS_STF
)
5524 && ia64_st_address_bypass_p (insn
, next
))
5526 else if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_LD
5527 || ia64_safe_itanium_class (next
)
5528 == ITANIUM_CLASS_FLD
)
5529 && ia64_ld_address_bypass_p (insn
, next
))
5532 insn
->call
= link
!= 0;
5536 /* We're beginning a new block. Initialize data structures as necessary. */
5539 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
5540 int sched_verbose ATTRIBUTE_UNUSED
,
5541 int max_ready ATTRIBUTE_UNUSED
)
5543 #ifdef ENABLE_CHECKING
5546 if (reload_completed
)
5547 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
5548 insn
!= current_sched_info
->next_tail
;
5549 insn
= NEXT_INSN (insn
))
5550 if (SCHED_GROUP_P (insn
))
5553 last_scheduled_insn
= NULL_RTX
;
5554 init_insn_group_barriers ();
5557 /* We are about to being issuing insns for this clock cycle.
5558 Override the default sort algorithm to better slot instructions. */
5561 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
,
5562 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
,
5566 int n_ready
= *pn_ready
;
5567 rtx
*e_ready
= ready
+ n_ready
;
5571 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
5573 if (reorder_type
== 0)
5575 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5577 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5578 if (insnp
< e_ready
)
5581 enum attr_type t
= ia64_safe_type (insn
);
5582 if (t
== TYPE_UNKNOWN
)
5584 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
5585 || asm_noperands (PATTERN (insn
)) >= 0)
5587 rtx lowest
= ready
[n_asms
];
5588 ready
[n_asms
] = insn
;
5594 rtx highest
= ready
[n_ready
- 1];
5595 ready
[n_ready
- 1] = insn
;
5602 if (n_asms
< n_ready
)
5604 /* Some normal insns to process. Skip the asms. */
5608 else if (n_ready
> 0)
5612 if (ia64_final_schedule
)
5615 int nr_need_stop
= 0;
5617 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5618 if (safe_group_barrier_needed_p (*insnp
))
5621 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
5623 if (reorder_type
== 0)
5626 /* Move down everything that needs a stop bit, preserving
5628 while (insnp
-- > ready
+ deleted
)
5629 while (insnp
>= ready
+ deleted
)
5632 if (! safe_group_barrier_needed_p (insn
))
5634 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
5645 /* We are about to being issuing insns for this clock cycle. Override
5646 the default sort algorithm to better slot instructions. */
5649 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
5652 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
5653 pn_ready
, clock_var
, 0);
5656 /* Like ia64_sched_reorder, but called after issuing each insn.
5657 Override the default sort algorithm to better slot instructions. */
5660 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
5661 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
5662 int *pn_ready
, int clock_var
)
5664 if (ia64_tune
== PROCESSOR_ITANIUM
&& reload_completed
&& last_scheduled_insn
)
5665 clocks
[INSN_UID (last_scheduled_insn
)] = clock_var
;
5666 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
5670 /* We are about to issue INSN. Return the number of insns left on the
5671 ready queue that can be issued this cycle. */
5674 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
5675 int sched_verbose ATTRIBUTE_UNUSED
,
5676 rtx insn ATTRIBUTE_UNUSED
,
5677 int can_issue_more ATTRIBUTE_UNUSED
)
5679 last_scheduled_insn
= insn
;
5680 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
5681 if (reload_completed
)
5683 if (group_barrier_needed_p (insn
))
5685 if (GET_CODE (insn
) == CALL_INSN
)
5686 init_insn_group_barriers ();
5687 stops_p
[INSN_UID (insn
)] = stop_before_p
;
5693 /* We are choosing insn from the ready queue. Return nonzero if INSN
5697 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn
)
5699 if (insn
== NULL_RTX
|| !INSN_P (insn
))
5701 return (!reload_completed
5702 || !safe_group_barrier_needed_p (insn
));
5705 /* The following variable value is pseudo-insn used by the DFA insn
5706 scheduler to change the DFA state when the simulated clock is
5709 static rtx dfa_pre_cycle_insn
;
5711 /* We are about to being issuing INSN. Return nonzero if we cannot
5712 issue it on given cycle CLOCK and return zero if we should not sort
5713 the ready queue on the next clock start. */
5716 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx insn
, int last_clock
,
5717 int clock
, int *sort_p
)
5719 int setup_clocks_p
= FALSE
;
5721 if (insn
== NULL_RTX
|| !INSN_P (insn
))
5723 if ((reload_completed
&& safe_group_barrier_needed_p (insn
))
5724 || (last_scheduled_insn
5725 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
5726 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
5727 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
5729 init_insn_group_barriers ();
5730 if (verbose
&& dump
)
5731 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
5732 last_clock
== clock
? " + cycle advance" : "");
5734 if (last_clock
== clock
)
5736 state_transition (curr_state
, dfa_stop_insn
);
5737 if (TARGET_EARLY_STOP_BITS
)
5738 *sort_p
= (last_scheduled_insn
== NULL_RTX
5739 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
5744 else if (reload_completed
)
5745 setup_clocks_p
= TRUE
;
5746 if (GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
5747 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)
5748 state_reset (curr_state
);
5751 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
5752 state_transition (curr_state
, dfa_stop_insn
);
5753 state_transition (curr_state
, dfa_pre_cycle_insn
);
5754 state_transition (curr_state
, NULL
);
5757 else if (reload_completed
)
5758 setup_clocks_p
= TRUE
;
5759 if (setup_clocks_p
&& ia64_tune
== PROCESSOR_ITANIUM
5760 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
5761 && asm_noperands (PATTERN (insn
)) < 0)
5763 enum attr_itanium_class c
= ia64_safe_itanium_class (insn
);
5765 if (c
!= ITANIUM_CLASS_MMMUL
&& c
!= ITANIUM_CLASS_MMSHF
)
5770 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
5771 if (REG_NOTE_KIND (link
) == 0)
5773 enum attr_itanium_class dep_class
;
5774 rtx dep_insn
= XEXP (link
, 0);
5776 dep_class
= ia64_safe_itanium_class (dep_insn
);
5777 if ((dep_class
== ITANIUM_CLASS_MMMUL
5778 || dep_class
== ITANIUM_CLASS_MMSHF
)
5779 && last_clock
- clocks
[INSN_UID (dep_insn
)] < 4
5781 || last_clock
- clocks
[INSN_UID (dep_insn
)] < d
))
5782 d
= last_clock
- clocks
[INSN_UID (dep_insn
)];
5785 add_cycles
[INSN_UID (insn
)] = 3 - d
;
5793 /* The following page contains abstract data `bundle states' which are
5794 used for bundling insns (inserting nops and template generation). */
5796 /* The following describes state of insn bundling. */
5800 /* Unique bundle state number to identify them in the debugging
5803 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
5804 /* number nops before and after the insn */
5805 short before_nops_num
, after_nops_num
;
5806 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
5808 int cost
; /* cost of the state in cycles */
5809 int accumulated_insns_num
; /* number of all previous insns including
5810 nops. L is considered as 2 insns */
5811 int branch_deviation
; /* deviation of previous branches from 3rd slots */
5812 struct bundle_state
*next
; /* next state with the same insn_num */
5813 struct bundle_state
*originator
; /* originator (previous insn state) */
5814 /* All bundle states are in the following chain. */
5815 struct bundle_state
*allocated_states_chain
;
5816 /* The DFA State after issuing the insn and the nops. */
5820 /* The following is map insn number to the corresponding bundle state. */
5822 static struct bundle_state
**index_to_bundle_states
;
5824 /* The unique number of next bundle state. */
5826 static int bundle_states_num
;
5828 /* All allocated bundle states are in the following chain. */
5830 static struct bundle_state
*allocated_bundle_states_chain
;
5832 /* All allocated but not used bundle states are in the following
5835 static struct bundle_state
*free_bundle_state_chain
;
5838 /* The following function returns a free bundle state. */
5840 static struct bundle_state
*
5841 get_free_bundle_state (void)
5843 struct bundle_state
*result
;
5845 if (free_bundle_state_chain
!= NULL
)
5847 result
= free_bundle_state_chain
;
5848 free_bundle_state_chain
= result
->next
;
5852 result
= xmalloc (sizeof (struct bundle_state
));
5853 result
->dfa_state
= xmalloc (dfa_state_size
);
5854 result
->allocated_states_chain
= allocated_bundle_states_chain
;
5855 allocated_bundle_states_chain
= result
;
5857 result
->unique_num
= bundle_states_num
++;
5862 /* The following function frees given bundle state. */
5865 free_bundle_state (struct bundle_state
*state
)
5867 state
->next
= free_bundle_state_chain
;
5868 free_bundle_state_chain
= state
;
5871 /* Start work with abstract data `bundle states'. */
5874 initiate_bundle_states (void)
5876 bundle_states_num
= 0;
5877 free_bundle_state_chain
= NULL
;
5878 allocated_bundle_states_chain
= NULL
;
5881 /* Finish work with abstract data `bundle states'. */
5884 finish_bundle_states (void)
5886 struct bundle_state
*curr_state
, *next_state
;
5888 for (curr_state
= allocated_bundle_states_chain
;
5890 curr_state
= next_state
)
5892 next_state
= curr_state
->allocated_states_chain
;
5893 free (curr_state
->dfa_state
);
5898 /* Hash table of the bundle states. The key is dfa_state and insn_num
5899 of the bundle states. */
5901 static htab_t bundle_state_table
;
5903 /* The function returns hash of BUNDLE_STATE. */
5906 bundle_state_hash (const void *bundle_state
)
5908 const struct bundle_state
*state
= (struct bundle_state
*) bundle_state
;
5911 for (result
= i
= 0; i
< dfa_state_size
; i
++)
5912 result
+= (((unsigned char *) state
->dfa_state
) [i
]
5913 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
5914 return result
+ state
->insn_num
;
5917 /* The function returns nonzero if the bundle state keys are equal. */
5920 bundle_state_eq_p (const void *bundle_state_1
, const void *bundle_state_2
)
5922 const struct bundle_state
* state1
= (struct bundle_state
*) bundle_state_1
;
5923 const struct bundle_state
* state2
= (struct bundle_state
*) bundle_state_2
;
5925 return (state1
->insn_num
== state2
->insn_num
5926 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
5927 dfa_state_size
) == 0);
5930 /* The function inserts the BUNDLE_STATE into the hash table. The
5931 function returns nonzero if the bundle has been inserted into the
5932 table. The table contains the best bundle state with given key. */
5935 insert_bundle_state (struct bundle_state
*bundle_state
)
5939 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, 1);
5940 if (*entry_ptr
== NULL
)
5942 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
5943 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
5944 *entry_ptr
= (void *) bundle_state
;
5947 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
5948 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
5949 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
5950 > bundle_state
->accumulated_insns_num
5951 || (((struct bundle_state
*)
5952 *entry_ptr
)->accumulated_insns_num
5953 == bundle_state
->accumulated_insns_num
5954 && ((struct bundle_state
*)
5955 *entry_ptr
)->branch_deviation
5956 > bundle_state
->branch_deviation
))))
5959 struct bundle_state temp
;
5961 temp
= *(struct bundle_state
*) *entry_ptr
;
5962 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
5963 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
5964 *bundle_state
= temp
;
5969 /* Start work with the hash table. */
5972 initiate_bundle_state_table (void)
5974 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
5978 /* Finish work with the hash table. */
5981 finish_bundle_state_table (void)
5983 htab_delete (bundle_state_table
);
5988 /* The following variable is a insn `nop' used to check bundle states
5989 with different number of inserted nops. */
5991 static rtx ia64_nop
;
5993 /* The following function tries to issue NOPS_NUM nops for the current
5994 state without advancing processor cycle. If it failed, the
5995 function returns FALSE and frees the current state. */
5998 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
6002 for (i
= 0; i
< nops_num
; i
++)
6003 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
6005 free_bundle_state (curr_state
);
6011 /* The following function tries to issue INSN for the current
6012 state without advancing processor cycle. If it failed, the
6013 function returns FALSE and frees the current state. */
6016 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
6018 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
6020 free_bundle_state (curr_state
);
6026 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6027 starting with ORIGINATOR without advancing processor cycle. If
6028 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6029 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6030 If it was successful, the function creates new bundle state and
6031 insert into the hash table and into `index_to_bundle_states'. */
6034 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
6035 rtx insn
, int try_bundle_end_p
, int only_bundle_end_p
)
6037 struct bundle_state
*curr_state
;
6039 curr_state
= get_free_bundle_state ();
6040 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
6041 curr_state
->insn
= insn
;
6042 curr_state
->insn_num
= originator
->insn_num
+ 1;
6043 curr_state
->cost
= originator
->cost
;
6044 curr_state
->originator
= originator
;
6045 curr_state
->before_nops_num
= before_nops_num
;
6046 curr_state
->after_nops_num
= 0;
6047 curr_state
->accumulated_insns_num
6048 = originator
->accumulated_insns_num
+ before_nops_num
;
6049 curr_state
->branch_deviation
= originator
->branch_deviation
;
6050 if (insn
== NULL_RTX
)
6052 else if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
6054 if (GET_MODE (insn
) == TImode
)
6056 if (!try_issue_nops (curr_state
, before_nops_num
))
6058 if (!try_issue_insn (curr_state
, insn
))
6060 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
6061 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
6062 && curr_state
->accumulated_insns_num
% 3 != 0)
6064 free_bundle_state (curr_state
);
6068 else if (GET_MODE (insn
) != TImode
)
6070 if (!try_issue_nops (curr_state
, before_nops_num
))
6072 if (!try_issue_insn (curr_state
, insn
))
6074 curr_state
->accumulated_insns_num
++;
6075 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6076 || asm_noperands (PATTERN (insn
)) >= 0)
6078 if (ia64_safe_type (insn
) == TYPE_L
)
6079 curr_state
->accumulated_insns_num
++;
6083 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
6084 state_transition (curr_state
->dfa_state
, NULL
);
6086 if (!try_issue_nops (curr_state
, before_nops_num
))
6088 if (!try_issue_insn (curr_state
, insn
))
6090 curr_state
->accumulated_insns_num
++;
6091 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6092 || asm_noperands (PATTERN (insn
)) >= 0)
6094 /* Finish bundle containing asm insn. */
6095 curr_state
->after_nops_num
6096 = 3 - curr_state
->accumulated_insns_num
% 3;
6097 curr_state
->accumulated_insns_num
6098 += 3 - curr_state
->accumulated_insns_num
% 3;
6100 else if (ia64_safe_type (insn
) == TYPE_L
)
6101 curr_state
->accumulated_insns_num
++;
6103 if (ia64_safe_type (insn
) == TYPE_B
)
6104 curr_state
->branch_deviation
6105 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
6106 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
6108 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
6111 struct bundle_state
*curr_state1
;
6112 struct bundle_state
*allocated_states_chain
;
6114 curr_state1
= get_free_bundle_state ();
6115 dfa_state
= curr_state1
->dfa_state
;
6116 allocated_states_chain
= curr_state1
->allocated_states_chain
;
6117 *curr_state1
= *curr_state
;
6118 curr_state1
->dfa_state
= dfa_state
;
6119 curr_state1
->allocated_states_chain
= allocated_states_chain
;
6120 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
6122 curr_state
= curr_state1
;
6124 if (!try_issue_nops (curr_state
,
6125 3 - curr_state
->accumulated_insns_num
% 3))
6127 curr_state
->after_nops_num
6128 = 3 - curr_state
->accumulated_insns_num
% 3;
6129 curr_state
->accumulated_insns_num
6130 += 3 - curr_state
->accumulated_insns_num
% 3;
6132 if (!insert_bundle_state (curr_state
))
6133 free_bundle_state (curr_state
);
6137 /* The following function returns position in the two window bundle
6141 get_max_pos (state_t state
)
6143 if (cpu_unit_reservation_p (state
, pos_6
))
6145 else if (cpu_unit_reservation_p (state
, pos_5
))
6147 else if (cpu_unit_reservation_p (state
, pos_4
))
6149 else if (cpu_unit_reservation_p (state
, pos_3
))
6151 else if (cpu_unit_reservation_p (state
, pos_2
))
6153 else if (cpu_unit_reservation_p (state
, pos_1
))
6159 /* The function returns code of a possible template for given position
6160 and state. The function should be called only with 2 values of
6161 position equal to 3 or 6. */
6164 get_template (state_t state
, int pos
)
6169 if (cpu_unit_reservation_p (state
, _0mii_
))
6171 else if (cpu_unit_reservation_p (state
, _0mmi_
))
6173 else if (cpu_unit_reservation_p (state
, _0mfi_
))
6175 else if (cpu_unit_reservation_p (state
, _0mmf_
))
6177 else if (cpu_unit_reservation_p (state
, _0bbb_
))
6179 else if (cpu_unit_reservation_p (state
, _0mbb_
))
6181 else if (cpu_unit_reservation_p (state
, _0mib_
))
6183 else if (cpu_unit_reservation_p (state
, _0mmb_
))
6185 else if (cpu_unit_reservation_p (state
, _0mfb_
))
6187 else if (cpu_unit_reservation_p (state
, _0mlx_
))
6192 if (cpu_unit_reservation_p (state
, _1mii_
))
6194 else if (cpu_unit_reservation_p (state
, _1mmi_
))
6196 else if (cpu_unit_reservation_p (state
, _1mfi_
))
6198 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
6200 else if (cpu_unit_reservation_p (state
, _1bbb_
))
6202 else if (cpu_unit_reservation_p (state
, _1mbb_
))
6204 else if (cpu_unit_reservation_p (state
, _1mib_
))
6206 else if (cpu_unit_reservation_p (state
, _1mmb_
))
6208 else if (cpu_unit_reservation_p (state
, _1mfb_
))
6210 else if (cpu_unit_reservation_p (state
, _1mlx_
))
6219 /* The following function returns an insn important for insn bundling
6220 followed by INSN and before TAIL. */
6223 get_next_important_insn (rtx insn
, rtx tail
)
6225 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
6227 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6228 && GET_CODE (PATTERN (insn
)) != USE
6229 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6234 /* The following function does insn bundling. Bundling means
6235 inserting templates and nop insns to fit insn groups into permitted
6236 templates. Instruction scheduling uses NDFA (non-deterministic
6237 finite automata) encoding informations about the templates and the
6238 inserted nops. Nondeterminism of the automata permits follows
6239 all possible insn sequences very fast.
6241 Unfortunately it is not possible to get information about inserting
6242 nop insns and used templates from the automata states. The
6243 automata only says that we can issue an insn possibly inserting
6244 some nops before it and using some template. Therefore insn
6245 bundling in this function is implemented by using DFA
6246 (deterministic finite automata). We follows all possible insn
6247 sequences by inserting 0-2 nops (that is what the NDFA describe for
6248 insn scheduling) before/after each insn being bundled. We know the
6249 start of simulated processor cycle from insn scheduling (insn
6250 starting a new cycle has TImode).
6252 Simple implementation of insn bundling would create enormous
6253 number of possible insn sequences satisfying information about new
6254 cycle ticks taken from the insn scheduling. To make the algorithm
6255 practical we use dynamic programming. Each decision (about
6256 inserting nops and implicitly about previous decisions) is described
6257 by structure bundle_state (see above). If we generate the same
6258 bundle state (key is automaton state after issuing the insns and
6259 nops for it), we reuse already generated one. As consequence we
6260 reject some decisions which cannot improve the solution and
6261 reduce memory for the algorithm.
6263 When we reach the end of EBB (extended basic block), we choose the
6264 best sequence and then, moving back in EBB, insert templates for
6265 the best alternative. The templates are taken from querying
6266 automaton state for each insn in chosen bundle states.
6268 So the algorithm makes two (forward and backward) passes through
6269 EBB. There is an additional forward pass through EBB for Itanium1
6270 processor. This pass inserts more nops to make dependency between
6271 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6274 bundling (FILE *dump
, int verbose
, rtx prev_head_insn
, rtx tail
)
6276 struct bundle_state
*curr_state
, *next_state
, *best_state
;
6277 rtx insn
, next_insn
;
6279 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
6280 int pos
= 0, max_pos
, template0
, template1
;
6283 enum attr_type type
;
6286 /* Count insns in the EBB. */
6287 for (insn
= NEXT_INSN (prev_head_insn
);
6288 insn
&& insn
!= tail
;
6289 insn
= NEXT_INSN (insn
))
6295 dfa_clean_insn_cache ();
6296 initiate_bundle_state_table ();
6297 index_to_bundle_states
= xmalloc ((insn_num
+ 2)
6298 * sizeof (struct bundle_state
*));
6299 /* First (forward) pass -- generation of bundle states. */
6300 curr_state
= get_free_bundle_state ();
6301 curr_state
->insn
= NULL
;
6302 curr_state
->before_nops_num
= 0;
6303 curr_state
->after_nops_num
= 0;
6304 curr_state
->insn_num
= 0;
6305 curr_state
->cost
= 0;
6306 curr_state
->accumulated_insns_num
= 0;
6307 curr_state
->branch_deviation
= 0;
6308 curr_state
->next
= NULL
;
6309 curr_state
->originator
= NULL
;
6310 state_reset (curr_state
->dfa_state
);
6311 index_to_bundle_states
[0] = curr_state
;
6313 /* Shift cycle mark if it is put on insn which could be ignored. */
6314 for (insn
= NEXT_INSN (prev_head_insn
);
6316 insn
= NEXT_INSN (insn
))
6318 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6319 || GET_CODE (PATTERN (insn
)) == USE
6320 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6321 && GET_MODE (insn
) == TImode
)
6323 PUT_MODE (insn
, VOIDmode
);
6324 for (next_insn
= NEXT_INSN (insn
);
6326 next_insn
= NEXT_INSN (next_insn
))
6327 if (INSN_P (next_insn
)
6328 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
6329 && GET_CODE (PATTERN (next_insn
)) != USE
6330 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
6332 PUT_MODE (next_insn
, TImode
);
6336 /* Froward pass: generation of bundle states. */
6337 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6342 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6343 || GET_CODE (PATTERN (insn
)) == USE
6344 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6346 type
= ia64_safe_type (insn
);
6347 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6349 index_to_bundle_states
[insn_num
] = NULL
;
6350 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
6352 curr_state
= next_state
)
6354 pos
= curr_state
->accumulated_insns_num
% 3;
6355 next_state
= curr_state
->next
;
6356 /* We must fill up the current bundle in order to start a
6357 subsequent asm insn in a new bundle. Asm insn is always
6358 placed in a separate bundle. */
6360 = (next_insn
!= NULL_RTX
6361 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
6362 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
6363 /* We may fill up the current bundle if it is the cycle end
6364 without a group barrier. */
6366 = (only_bundle_end_p
|| next_insn
== NULL_RTX
6367 || (GET_MODE (next_insn
) == TImode
6368 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
6369 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
6371 /* We need to insert 2 nops for cases like M_MII. To
6372 guarantee issuing all insns on the same cycle for
6373 Itanium 1, we need to issue 2 nops after the first M
6374 insn (MnnMII where n is a nop insn). */
6375 || ((type
== TYPE_M
|| type
== TYPE_A
)
6376 && ia64_tune
== PROCESSOR_ITANIUM
6377 && !bundle_end_p
&& pos
== 1))
6378 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
6380 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
6382 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
6385 if (index_to_bundle_states
[insn_num
] == NULL
)
6387 for (curr_state
= index_to_bundle_states
[insn_num
];
6389 curr_state
= curr_state
->next
)
6390 if (verbose
>= 2 && dump
)
6392 /* This structure is taken from generated code of the
6393 pipeline hazard recognizer (see file insn-attrtab.c).
6394 Please don't forget to change the structure if a new
6395 automaton is added to .md file. */
6398 unsigned short one_automaton_state
;
6399 unsigned short oneb_automaton_state
;
6400 unsigned short two_automaton_state
;
6401 unsigned short twob_automaton_state
;
6406 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6407 curr_state
->unique_num
,
6408 (curr_state
->originator
== NULL
6409 ? -1 : curr_state
->originator
->unique_num
),
6411 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6412 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6413 (ia64_tune
== PROCESSOR_ITANIUM
6414 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6415 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6419 if (index_to_bundle_states
[insn_num
] == NULL
)
6420 /* We should find a solution because the 2nd insn scheduling has
6423 /* Find a state corresponding to the best insn sequence. */
6425 for (curr_state
= index_to_bundle_states
[insn_num
];
6427 curr_state
= curr_state
->next
)
6428 /* We are just looking at the states with fully filled up last
6429 bundle. The first we prefer insn sequences with minimal cost
6430 then with minimal inserted nops and finally with branch insns
6431 placed in the 3rd slots. */
6432 if (curr_state
->accumulated_insns_num
% 3 == 0
6433 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
6434 || (best_state
->cost
== curr_state
->cost
6435 && (curr_state
->accumulated_insns_num
6436 < best_state
->accumulated_insns_num
6437 || (curr_state
->accumulated_insns_num
6438 == best_state
->accumulated_insns_num
6439 && curr_state
->branch_deviation
6440 < best_state
->branch_deviation
)))))
6441 best_state
= curr_state
;
6442 /* Second (backward) pass: adding nops and templates. */
6443 insn_num
= best_state
->before_nops_num
;
6444 template0
= template1
= -1;
6445 for (curr_state
= best_state
;
6446 curr_state
->originator
!= NULL
;
6447 curr_state
= curr_state
->originator
)
6449 insn
= curr_state
->insn
;
6450 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6451 || asm_noperands (PATTERN (insn
)) >= 0);
6453 if (verbose
>= 2 && dump
)
6457 unsigned short one_automaton_state
;
6458 unsigned short oneb_automaton_state
;
6459 unsigned short two_automaton_state
;
6460 unsigned short twob_automaton_state
;
6465 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6466 curr_state
->unique_num
,
6467 (curr_state
->originator
== NULL
6468 ? -1 : curr_state
->originator
->unique_num
),
6470 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6471 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6472 (ia64_tune
== PROCESSOR_ITANIUM
6473 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6474 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6477 /* Find the position in the current bundle window. The window can
6478 contain at most two bundles. Two bundle window means that
6479 the processor will make two bundle rotation. */
6480 max_pos
= get_max_pos (curr_state
->dfa_state
);
6482 /* The following (negative template number) means that the
6483 processor did one bundle rotation. */
6484 || (max_pos
== 3 && template0
< 0))
6486 /* We are at the end of the window -- find template(s) for
6490 template0
= get_template (curr_state
->dfa_state
, 3);
6493 template1
= get_template (curr_state
->dfa_state
, 3);
6494 template0
= get_template (curr_state
->dfa_state
, 6);
6497 if (max_pos
> 3 && template1
< 0)
6498 /* It may happen when we have the stop inside a bundle. */
6502 template1
= get_template (curr_state
->dfa_state
, 3);
6506 /* Emit nops after the current insn. */
6507 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
6510 emit_insn_after (nop
, insn
);
6516 /* We are at the start of a bundle: emit the template
6517 (it should be defined). */
6520 b
= gen_bundle_selector (GEN_INT (template0
));
6521 ia64_emit_insn_before (b
, nop
);
6522 /* If we have two bundle window, we make one bundle
6523 rotation. Otherwise template0 will be undefined
6524 (negative value). */
6525 template0
= template1
;
6529 /* Move the position backward in the window. Group barrier has
6530 no slot. Asm insn takes all bundle. */
6531 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6532 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6533 && asm_noperands (PATTERN (insn
)) < 0)
6535 /* Long insn takes 2 slots. */
6536 if (ia64_safe_type (insn
) == TYPE_L
)
6541 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6542 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6543 && asm_noperands (PATTERN (insn
)) < 0)
6545 /* The current insn is at the bundle start: emit the
6549 b
= gen_bundle_selector (GEN_INT (template0
));
6550 ia64_emit_insn_before (b
, insn
);
6551 b
= PREV_INSN (insn
);
6553 /* See comment above in analogous place for emitting nops
6555 template0
= template1
;
6558 /* Emit nops after the current insn. */
6559 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
6562 ia64_emit_insn_before (nop
, insn
);
6563 nop
= PREV_INSN (insn
);
6570 /* See comment above in analogous place for emitting nops
6574 b
= gen_bundle_selector (GEN_INT (template0
));
6575 ia64_emit_insn_before (b
, insn
);
6576 b
= PREV_INSN (insn
);
6578 template0
= template1
;
6583 if (ia64_tune
== PROCESSOR_ITANIUM
)
6584 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6585 Itanium1 has a strange design, if the distance between an insn
6586 and dependent MM-insn is less 4 then we have a 6 additional
6587 cycles stall. So we make the distance equal to 4 cycles if it
6589 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6594 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6595 || GET_CODE (PATTERN (insn
)) == USE
6596 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6598 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6599 if (INSN_UID (insn
) < clocks_length
&& add_cycles
[INSN_UID (insn
)])
6600 /* We found a MM-insn which needs additional cycles. */
6606 /* Now we are searching for a template of the bundle in
6607 which the MM-insn is placed and the position of the
6608 insn in the bundle (0, 1, 2). Also we are searching
6609 for that there is a stop before the insn. */
6610 last
= prev_active_insn (insn
);
6611 pred_stop_p
= recog_memoized (last
) == CODE_FOR_insn_group_barrier
;
6613 last
= prev_active_insn (last
);
6615 for (;; last
= prev_active_insn (last
))
6616 if (recog_memoized (last
) == CODE_FOR_bundle_selector
)
6618 template0
= XINT (XVECEXP (PATTERN (last
), 0, 0), 0);
6620 /* The insn is in MLX bundle. Change the template
6621 onto MFI because we will add nops before the
6622 insn. It simplifies subsequent code a lot. */
6624 = gen_bundle_selector (const2_rtx
); /* -> MFI */
6627 else if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
6628 && (ia64_safe_itanium_class (last
)
6629 != ITANIUM_CLASS_IGNORE
))
6631 /* Some check of correctness: the stop is not at the
6632 bundle start, there are no more 3 insns in the bundle,
6633 and the MM-insn is not at the start of bundle with
6635 if ((pred_stop_p
&& n
== 0) || n
> 2
6636 || (template0
== 9 && n
!= 0))
6638 /* Put nops after the insn in the bundle. */
6639 for (j
= 3 - n
; j
> 0; j
--)
6640 ia64_emit_insn_before (gen_nop (), insn
);
6641 /* It takes into account that we will add more N nops
6642 before the insn lately -- please see code below. */
6643 add_cycles
[INSN_UID (insn
)]--;
6644 if (!pred_stop_p
|| add_cycles
[INSN_UID (insn
)])
6645 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6648 add_cycles
[INSN_UID (insn
)]--;
6649 for (i
= add_cycles
[INSN_UID (insn
)]; i
> 0; i
--)
6651 /* Insert "MII;" template. */
6652 ia64_emit_insn_before (gen_bundle_selector (const0_rtx
),
6654 ia64_emit_insn_before (gen_nop (), insn
);
6655 ia64_emit_insn_before (gen_nop (), insn
);
6658 /* To decrease code size, we use "MI;I;"
6660 ia64_emit_insn_before
6661 (gen_insn_group_barrier (GEN_INT (3)), insn
);
6664 ia64_emit_insn_before (gen_nop (), insn
);
6665 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6668 /* Put the MM-insn in the same slot of a bundle with the
6669 same template as the original one. */
6670 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0
)),
6672 /* To put the insn in the same slot, add necessary number
6674 for (j
= n
; j
> 0; j
--)
6675 ia64_emit_insn_before (gen_nop (), insn
);
6676 /* Put the stop if the original bundle had it. */
6678 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6682 free (index_to_bundle_states
);
6683 finish_bundle_state_table ();
6685 dfa_clean_insn_cache ();
6688 /* The following function is called at the end of scheduling BB or
6689 EBB. After reload, it inserts stop bits and does insn bundling. */
6692 ia64_sched_finish (FILE *dump
, int sched_verbose
)
6695 fprintf (dump
, "// Finishing schedule.\n");
6696 if (!reload_completed
)
6698 if (reload_completed
)
6700 final_emit_insn_group_barriers (dump
);
6701 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
6702 current_sched_info
->next_tail
);
6703 if (sched_verbose
&& dump
)
6704 fprintf (dump
, "// finishing %d-%d\n",
6705 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
6706 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
6712 /* The following function inserts stop bits in scheduled BB or EBB. */
6715 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
6718 int need_barrier_p
= 0;
6719 rtx prev_insn
= NULL_RTX
;
6721 init_insn_group_barriers ();
6723 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6724 insn
!= current_sched_info
->next_tail
;
6725 insn
= NEXT_INSN (insn
))
6727 if (GET_CODE (insn
) == BARRIER
)
6729 rtx last
= prev_active_insn (insn
);
6733 if (GET_CODE (last
) == JUMP_INSN
6734 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
6735 last
= prev_active_insn (last
);
6736 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6737 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
6739 init_insn_group_barriers ();
6741 prev_insn
= NULL_RTX
;
6743 else if (INSN_P (insn
))
6745 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
6747 init_insn_group_barriers ();
6749 prev_insn
= NULL_RTX
;
6751 else if (need_barrier_p
|| group_barrier_needed_p (insn
))
6753 if (TARGET_EARLY_STOP_BITS
)
6758 last
!= current_sched_info
->prev_head
;
6759 last
= PREV_INSN (last
))
6760 if (INSN_P (last
) && GET_MODE (last
) == TImode
6761 && stops_p
[INSN_UID (last
)])
6763 if (last
== current_sched_info
->prev_head
)
6765 last
= prev_active_insn (last
);
6767 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6768 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6770 init_insn_group_barriers ();
6771 for (last
= NEXT_INSN (last
);
6773 last
= NEXT_INSN (last
))
6775 group_barrier_needed_p (last
);
6779 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6781 init_insn_group_barriers ();
6783 group_barrier_needed_p (insn
);
6784 prev_insn
= NULL_RTX
;
6786 else if (recog_memoized (insn
) >= 0)
6788 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
6789 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6790 || asm_noperands (PATTERN (insn
)) >= 0);
6797 /* If the following function returns TRUE, we will use the the DFA
6801 ia64_first_cycle_multipass_dfa_lookahead (void)
6803 return (reload_completed
? 6 : 4);
6806 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6809 ia64_init_dfa_pre_cycle_insn (void)
6811 if (temp_dfa_state
== NULL
)
6813 dfa_state_size
= state_size ();
6814 temp_dfa_state
= xmalloc (dfa_state_size
);
6815 prev_cycle_state
= xmalloc (dfa_state_size
);
6817 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
6818 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
6819 recog_memoized (dfa_pre_cycle_insn
);
6820 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6821 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
6822 recog_memoized (dfa_stop_insn
);
6825 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6826 used by the DFA insn scheduler. */
6829 ia64_dfa_pre_cycle_insn (void)
6831 return dfa_pre_cycle_insn
;
6834 /* The following function returns TRUE if PRODUCER (of type ilog or
6835 ld) produces address for CONSUMER (of type st or stf). */
6838 ia64_st_address_bypass_p (rtx producer
, rtx consumer
)
6842 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
6844 dest
= ia64_single_set (producer
);
6845 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
6846 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
6848 if (GET_CODE (reg
) == SUBREG
)
6849 reg
= SUBREG_REG (reg
);
6850 dest
= ia64_single_set (consumer
);
6851 if (dest
== NULL_RTX
|| (mem
= SET_DEST (dest
)) == NULL_RTX
6852 || GET_CODE (mem
) != MEM
)
6854 return reg_mentioned_p (reg
, mem
);
6857 /* The following function returns TRUE if PRODUCER (of type ilog or
6858 ld) produces address for CONSUMER (of type ld or fld). */
6861 ia64_ld_address_bypass_p (rtx producer
, rtx consumer
)
6863 rtx dest
, src
, reg
, mem
;
6865 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
6867 dest
= ia64_single_set (producer
);
6868 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
6869 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
6871 if (GET_CODE (reg
) == SUBREG
)
6872 reg
= SUBREG_REG (reg
);
6873 src
= ia64_single_set (consumer
);
6874 if (src
== NULL_RTX
|| (mem
= SET_SRC (src
)) == NULL_RTX
)
6876 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
6877 mem
= XVECEXP (mem
, 0, 0);
6878 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
6879 mem
= XEXP (mem
, 0);
6881 /* Note that LO_SUM is used for GOT loads. */
6882 if (GET_CODE (mem
) != LO_SUM
&& GET_CODE (mem
) != MEM
)
6885 return reg_mentioned_p (reg
, mem
);
6888 /* The following function returns TRUE if INSN produces address for a
6889 load/store insn. We will place such insns into M slot because it
6890 decreases its latency time. */
6893 ia64_produce_address_p (rtx insn
)
6899 /* Emit pseudo-ops for the assembler to describe predicate relations.
6900 At present this assumes that we only consider predicate pairs to
6901 be mutex, and that the assembler can deduce proper values from
6902 straight-line code. */
6905 emit_predicate_relation_info (void)
6909 FOR_EACH_BB_REVERSE (bb
)
6912 rtx head
= BB_HEAD (bb
);
6914 /* We only need such notes at code labels. */
6915 if (GET_CODE (head
) != CODE_LABEL
)
6917 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6918 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6919 head
= NEXT_INSN (head
);
6921 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6922 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6924 rtx p
= gen_rtx_REG (BImode
, r
);
6925 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6926 if (head
== BB_END (bb
))
6932 /* Look for conditional calls that do not return, and protect predicate
6933 relations around them. Otherwise the assembler will assume the call
6934 returns, and complain about uses of call-clobbered predicates after
6936 FOR_EACH_BB_REVERSE (bb
)
6938 rtx insn
= BB_HEAD (bb
);
6942 if (GET_CODE (insn
) == CALL_INSN
6943 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6944 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6946 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6947 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6948 if (BB_HEAD (bb
) == insn
)
6950 if (BB_END (bb
) == insn
)
6954 if (insn
== BB_END (bb
))
6956 insn
= NEXT_INSN (insn
);
6961 /* Perform machine dependent operations on the rtl chain INSNS. */
6966 /* We are freeing block_for_insn in the toplev to keep compatibility
6967 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6968 compute_bb_for_insn ();
6970 /* If optimizing, we'll have split before scheduling. */
6972 split_all_insns (0);
6974 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6975 non-optimizing bootstrap. */
6976 update_life_info (NULL
, UPDATE_LIFE_GLOBAL_RM_NOTES
, PROP_DEATH_NOTES
);
6978 if (ia64_flag_schedule_insns2
)
6980 timevar_push (TV_SCHED2
);
6981 ia64_final_schedule
= 1;
6983 initiate_bundle_states ();
6984 ia64_nop
= make_insn_raw (gen_nop ());
6985 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
6986 recog_memoized (ia64_nop
);
6987 clocks_length
= get_max_uid () + 1;
6988 stops_p
= xcalloc (1, clocks_length
);
6989 if (ia64_tune
== PROCESSOR_ITANIUM
)
6991 clocks
= xcalloc (clocks_length
, sizeof (int));
6992 add_cycles
= xcalloc (clocks_length
, sizeof (int));
6994 if (ia64_tune
== PROCESSOR_ITANIUM2
)
6996 pos_1
= get_cpu_unit_code ("2_1");
6997 pos_2
= get_cpu_unit_code ("2_2");
6998 pos_3
= get_cpu_unit_code ("2_3");
6999 pos_4
= get_cpu_unit_code ("2_4");
7000 pos_5
= get_cpu_unit_code ("2_5");
7001 pos_6
= get_cpu_unit_code ("2_6");
7002 _0mii_
= get_cpu_unit_code ("2b_0mii.");
7003 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
7004 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
7005 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
7006 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
7007 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
7008 _0mib_
= get_cpu_unit_code ("2b_0mib.");
7009 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
7010 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
7011 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
7012 _1mii_
= get_cpu_unit_code ("2b_1mii.");
7013 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
7014 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
7015 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
7016 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
7017 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
7018 _1mib_
= get_cpu_unit_code ("2b_1mib.");
7019 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
7020 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
7021 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
7025 pos_1
= get_cpu_unit_code ("1_1");
7026 pos_2
= get_cpu_unit_code ("1_2");
7027 pos_3
= get_cpu_unit_code ("1_3");
7028 pos_4
= get_cpu_unit_code ("1_4");
7029 pos_5
= get_cpu_unit_code ("1_5");
7030 pos_6
= get_cpu_unit_code ("1_6");
7031 _0mii_
= get_cpu_unit_code ("1b_0mii.");
7032 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
7033 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
7034 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
7035 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
7036 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
7037 _0mib_
= get_cpu_unit_code ("1b_0mib.");
7038 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
7039 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
7040 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
7041 _1mii_
= get_cpu_unit_code ("1b_1mii.");
7042 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
7043 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
7044 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
7045 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
7046 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
7047 _1mib_
= get_cpu_unit_code ("1b_1mib.");
7048 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
7049 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
7050 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
7052 schedule_ebbs (dump_file
);
7053 finish_bundle_states ();
7054 if (ia64_tune
== PROCESSOR_ITANIUM
)
7060 emit_insn_group_barriers (dump_file
);
7062 ia64_final_schedule
= 0;
7063 timevar_pop (TV_SCHED2
);
7066 emit_all_insn_group_barriers (dump_file
);
7068 /* A call must not be the last instruction in a function, so that the
7069 return address is still within the function, so that unwinding works
7070 properly. Note that IA-64 differs from dwarf2 on this point. */
7071 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7076 insn
= get_last_insn ();
7077 if (! INSN_P (insn
))
7078 insn
= prev_active_insn (insn
);
7079 /* Skip over insns that expand to nothing. */
7080 while (GET_CODE (insn
) == INSN
&& get_attr_empty (insn
) == EMPTY_YES
)
7082 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7083 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7085 insn
= prev_active_insn (insn
);
7087 if (GET_CODE (insn
) == CALL_INSN
)
7090 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7091 emit_insn (gen_break_f ());
7092 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7097 emit_predicate_relation_info ();
7099 if (ia64_flag_var_tracking
)
7101 timevar_push (TV_VAR_TRACKING
);
7102 variable_tracking_main ();
7103 timevar_pop (TV_VAR_TRACKING
);
7107 /* Return true if REGNO is used by the epilogue. */
7110 ia64_epilogue_uses (int regno
)
7115 /* With a call to a function in another module, we will write a new
7116 value to "gp". After returning from such a call, we need to make
7117 sure the function restores the original gp-value, even if the
7118 function itself does not use the gp anymore. */
7119 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
7121 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7122 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7123 /* For functions defined with the syscall_linkage attribute, all
7124 input registers are marked as live at all function exits. This
7125 prevents the register allocator from using the input registers,
7126 which in turn makes it possible to restart a system call after
7127 an interrupt without having to save/restore the input registers.
7128 This also prevents kernel data from leaking to application code. */
7129 return lookup_attribute ("syscall_linkage",
7130 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
7133 /* Conditional return patterns can't represent the use of `b0' as
7134 the return address, so we force the value live this way. */
7138 /* Likewise for ar.pfs, which is used by br.ret. */
7146 /* Return true if REGNO is used by the frame unwinder. */
7149 ia64_eh_uses (int regno
)
7151 if (! reload_completed
)
7154 if (current_frame_info
.reg_save_b0
7155 && regno
== current_frame_info
.reg_save_b0
)
7157 if (current_frame_info
.reg_save_pr
7158 && regno
== current_frame_info
.reg_save_pr
)
7160 if (current_frame_info
.reg_save_ar_pfs
7161 && regno
== current_frame_info
.reg_save_ar_pfs
)
7163 if (current_frame_info
.reg_save_ar_unat
7164 && regno
== current_frame_info
.reg_save_ar_unat
)
7166 if (current_frame_info
.reg_save_ar_lc
7167 && regno
== current_frame_info
.reg_save_ar_lc
)
7173 /* Return true if this goes in small data/bss. */
7175 /* ??? We could also support own long data here. Generating movl/add/ld8
7176 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7177 code faster because there is one less load. This also includes incomplete
7178 types which can't go in sdata/sbss. */
7181 ia64_in_small_data_p (tree exp
)
7183 if (TARGET_NO_SDATA
)
7186 /* We want to merge strings, so we never consider them small data. */
7187 if (TREE_CODE (exp
) == STRING_CST
)
7190 /* Functions are never small data. */
7191 if (TREE_CODE (exp
) == FUNCTION_DECL
)
7194 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
7196 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
7197 if (strcmp (section
, ".sdata") == 0
7198 || strcmp (section
, ".sbss") == 0)
7203 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7205 /* If this is an incomplete type with size 0, then we can't put it
7206 in sdata because it might be too big when completed. */
7207 if (size
> 0 && size
<= ia64_section_threshold
)
7214 /* Output assembly directives for prologue regions. */
7216 /* The current basic block number. */
7218 static bool last_block
;
7220 /* True if we need a copy_state command at the start of the next block. */
7222 static bool need_copy_state
;
7224 /* The function emits unwind directives for the start of an epilogue. */
7227 process_epilogue (void)
7229 /* If this isn't the last block of the function, then we need to label the
7230 current state, and copy it back in at the start of the next block. */
7234 fprintf (asm_out_file
, "\t.label_state 1\n");
7235 need_copy_state
= true;
7238 fprintf (asm_out_file
, "\t.restore sp\n");
7241 /* This function processes a SET pattern looking for specific patterns
7242 which result in emitting an assembly directive required for unwinding. */
7245 process_set (FILE *asm_out_file
, rtx pat
)
7247 rtx src
= SET_SRC (pat
);
7248 rtx dest
= SET_DEST (pat
);
7249 int src_regno
, dest_regno
;
7251 /* Look for the ALLOC insn. */
7252 if (GET_CODE (src
) == UNSPEC_VOLATILE
7253 && XINT (src
, 1) == UNSPECV_ALLOC
7254 && GET_CODE (dest
) == REG
)
7256 dest_regno
= REGNO (dest
);
7258 /* If this isn't the final destination for ar.pfs, the alloc
7259 shouldn't have been marked frame related. */
7260 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7263 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7264 ia64_dbx_register_number (dest_regno
));
7268 /* Look for SP = .... */
7269 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7271 if (GET_CODE (src
) == PLUS
)
7273 rtx op0
= XEXP (src
, 0);
7274 rtx op1
= XEXP (src
, 1);
7275 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7277 if (INTVAL (op1
) < 0)
7278 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
7281 process_epilogue ();
7286 else if (GET_CODE (src
) == REG
7287 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7288 process_epilogue ();
7295 /* Register move we need to look at. */
7296 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7298 src_regno
= REGNO (src
);
7299 dest_regno
= REGNO (dest
);
7304 /* Saving return address pointer. */
7305 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7307 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7308 ia64_dbx_register_number (dest_regno
));
7312 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7314 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7315 ia64_dbx_register_number (dest_regno
));
7318 case AR_UNAT_REGNUM
:
7319 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7321 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7322 ia64_dbx_register_number (dest_regno
));
7326 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7328 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7329 ia64_dbx_register_number (dest_regno
));
7332 case STACK_POINTER_REGNUM
:
7333 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7334 || ! frame_pointer_needed
)
7336 fprintf (asm_out_file
, "\t.vframe r%d\n",
7337 ia64_dbx_register_number (dest_regno
));
7341 /* Everything else should indicate being stored to memory. */
7346 /* Memory store we need to look at. */
7347 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7353 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7355 base
= XEXP (dest
, 0);
7358 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7359 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7361 base
= XEXP (XEXP (dest
, 0), 0);
7362 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7367 if (base
== hard_frame_pointer_rtx
)
7369 saveop
= ".savepsp";
7372 else if (base
== stack_pointer_rtx
)
7377 src_regno
= REGNO (src
);
7381 if (current_frame_info
.reg_save_b0
!= 0)
7383 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7387 if (current_frame_info
.reg_save_pr
!= 0)
7389 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7393 if (current_frame_info
.reg_save_ar_lc
!= 0)
7395 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7399 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7401 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7404 case AR_UNAT_REGNUM
:
7405 if (current_frame_info
.reg_save_ar_unat
!= 0)
7407 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7414 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7415 1 << (src_regno
- GR_REG (4)));
7423 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7424 1 << (src_regno
- BR_REG (1)));
7431 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7432 1 << (src_regno
- FR_REG (2)));
7435 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7436 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7437 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7438 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7439 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7440 1 << (src_regno
- FR_REG (12)));
7452 /* This function looks at a single insn and emits any directives
7453 required to unwind this insn. */
7455 process_for_unwind_directive (FILE *asm_out_file
, rtx insn
)
7457 if (flag_unwind_tables
7458 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7462 if (GET_CODE (insn
) == NOTE
7463 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7465 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
7467 /* Restore unwind state from immediately before the epilogue. */
7468 if (need_copy_state
)
7470 fprintf (asm_out_file
, "\t.body\n");
7471 fprintf (asm_out_file
, "\t.copy_state 1\n");
7472 need_copy_state
= false;
7476 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7479 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7481 pat
= XEXP (pat
, 0);
7483 pat
= PATTERN (insn
);
7485 switch (GET_CODE (pat
))
7488 process_set (asm_out_file
, pat
);
7494 int limit
= XVECLEN (pat
, 0);
7495 for (par_index
= 0; par_index
< limit
; par_index
++)
7497 rtx x
= XVECEXP (pat
, 0, par_index
);
7498 if (GET_CODE (x
) == SET
)
7499 process_set (asm_out_file
, x
);
7512 ia64_init_builtins (void)
7514 tree psi_type_node
= build_pointer_type (integer_type_node
);
7515 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7517 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7518 tree si_ftype_psi_si_si
7519 = build_function_type_list (integer_type_node
,
7520 psi_type_node
, integer_type_node
,
7521 integer_type_node
, NULL_TREE
);
7523 /* __sync_val_compare_and_swap_di */
7524 tree di_ftype_pdi_di_di
7525 = build_function_type_list (long_integer_type_node
,
7526 pdi_type_node
, long_integer_type_node
,
7527 long_integer_type_node
, NULL_TREE
);
7528 /* __sync_bool_compare_and_swap_di */
7529 tree si_ftype_pdi_di_di
7530 = build_function_type_list (integer_type_node
,
7531 pdi_type_node
, long_integer_type_node
,
7532 long_integer_type_node
, NULL_TREE
);
7533 /* __sync_synchronize */
7534 tree void_ftype_void
7535 = build_function_type (void_type_node
, void_list_node
);
7537 /* __sync_lock_test_and_set_si */
7538 tree si_ftype_psi_si
7539 = build_function_type_list (integer_type_node
,
7540 psi_type_node
, integer_type_node
, NULL_TREE
);
7542 /* __sync_lock_test_and_set_di */
7543 tree di_ftype_pdi_di
7544 = build_function_type_list (long_integer_type_node
,
7545 pdi_type_node
, long_integer_type_node
,
7548 /* __sync_lock_release_si */
7550 = build_function_type_list (void_type_node
, psi_type_node
, NULL_TREE
);
7552 /* __sync_lock_release_di */
7554 = build_function_type_list (void_type_node
, pdi_type_node
, NULL_TREE
);
7559 /* The __fpreg type. */
7560 fpreg_type
= make_node (REAL_TYPE
);
7561 /* ??? The back end should know to load/save __fpreg variables using
7562 the ldf.fill and stf.spill instructions. */
7563 TYPE_PRECISION (fpreg_type
) = 80;
7564 layout_type (fpreg_type
);
7565 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
7567 /* The __float80 type. */
7568 float80_type
= make_node (REAL_TYPE
);
7569 TYPE_PRECISION (float80_type
) = 80;
7570 layout_type (float80_type
);
7571 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
7573 /* The __float128 type. */
7576 tree float128_type
= make_node (REAL_TYPE
);
7577 TYPE_PRECISION (float128_type
) = 128;
7578 layout_type (float128_type
);
7579 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
7582 /* Under HPUX, this is a synonym for "long double". */
7583 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
7586 #define def_builtin(name, type, code) \
7587 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7590 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7591 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7592 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7593 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7594 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7595 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7596 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di
,
7597 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7599 def_builtin ("__sync_synchronize", void_ftype_void
,
7600 IA64_BUILTIN_SYNCHRONIZE
);
7602 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7603 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7604 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7605 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7606 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7607 IA64_BUILTIN_LOCK_RELEASE_SI
);
7608 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7609 IA64_BUILTIN_LOCK_RELEASE_DI
);
7611 def_builtin ("__builtin_ia64_bsp",
7612 build_function_type (ptr_type_node
, void_list_node
),
7615 def_builtin ("__builtin_ia64_flushrs",
7616 build_function_type (void_type_node
, void_list_node
),
7617 IA64_BUILTIN_FLUSHRS
);
7619 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7620 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7621 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7622 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7623 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7624 IA64_BUILTIN_FETCH_AND_OR_SI
);
7625 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7626 IA64_BUILTIN_FETCH_AND_AND_SI
);
7627 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7628 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7629 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7630 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7632 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7633 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7634 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7635 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7636 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7637 IA64_BUILTIN_OR_AND_FETCH_SI
);
7638 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7639 IA64_BUILTIN_AND_AND_FETCH_SI
);
7640 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7641 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7642 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7643 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7645 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7646 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7647 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7648 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7649 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7650 IA64_BUILTIN_FETCH_AND_OR_DI
);
7651 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7652 IA64_BUILTIN_FETCH_AND_AND_DI
);
7653 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7654 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7655 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7656 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7658 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7659 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7660 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7661 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7662 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7663 IA64_BUILTIN_OR_AND_FETCH_DI
);
7664 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7665 IA64_BUILTIN_AND_AND_FETCH_DI
);
7666 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7667 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7668 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7669 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7674 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7682 cmpxchgsz.acq tmp = [ptr], tmp
7683 } while (tmp != ret)
7687 ia64_expand_fetch_and_op (optab binoptab
, enum machine_mode mode
,
7688 tree arglist
, rtx target
)
7690 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7693 arg0
= TREE_VALUE (arglist
);
7694 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7695 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7696 #ifdef POINTERS_EXTEND_UNSIGNED
7697 if (GET_MODE(mem
) != Pmode
)
7698 mem
= convert_memory_address (Pmode
, mem
);
7700 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7702 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7703 MEM_VOLATILE_P (mem
) = 1;
7705 if (target
&& register_operand (target
, mode
))
7708 ret
= gen_reg_rtx (mode
);
7710 emit_insn (gen_mf ());
7712 /* Special case for fetchadd instructions. */
7713 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7716 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7718 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7723 tmp
= gen_reg_rtx (mode
);
7724 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7725 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7726 emit_move_insn (tmp
, mem
);
7728 label
= gen_label_rtx ();
7730 emit_move_insn (ret
, tmp
);
7731 convert_move (ccv
, tmp
, /*unsignedp=*/1);
7733 /* Perform the specific operation. Special case NAND by noticing
7734 one_cmpl_optab instead. */
7735 if (binoptab
== one_cmpl_optab
)
7737 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7738 binoptab
= and_optab
;
7740 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7743 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7745 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7748 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
7753 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7760 ret = tmp <op> value;
7761 cmpxchgsz.acq tmp = [ptr], ret
7762 } while (tmp != old)
7766 ia64_expand_op_and_fetch (optab binoptab
, enum machine_mode mode
,
7767 tree arglist
, rtx target
)
7769 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7772 arg0
= TREE_VALUE (arglist
);
7773 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7774 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7775 #ifdef POINTERS_EXTEND_UNSIGNED
7776 if (GET_MODE(mem
) != Pmode
)
7777 mem
= convert_memory_address (Pmode
, mem
);
7780 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7782 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7783 MEM_VOLATILE_P (mem
) = 1;
7785 if (target
&& ! register_operand (target
, mode
))
7788 emit_insn (gen_mf ());
7789 tmp
= gen_reg_rtx (mode
);
7790 old
= gen_reg_rtx (mode
);
7791 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7792 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7794 emit_move_insn (tmp
, mem
);
7796 label
= gen_label_rtx ();
7798 emit_move_insn (old
, tmp
);
7799 convert_move (ccv
, tmp
, /*unsignedp=*/1);
7801 /* Perform the specific operation. Special case NAND by noticing
7802 one_cmpl_optab instead. */
7803 if (binoptab
== one_cmpl_optab
)
7805 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7806 binoptab
= and_optab
;
7808 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7811 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7813 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7816 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
7821 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7825 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7828 For bool_ it's the same except return ret == oldval.
7832 ia64_expand_compare_and_swap (enum machine_mode rmode
, enum machine_mode mode
,
7833 int boolp
, tree arglist
, rtx target
)
7835 tree arg0
, arg1
, arg2
;
7836 rtx mem
, old
, new, ccv
, tmp
, insn
;
7838 arg0
= TREE_VALUE (arglist
);
7839 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7840 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7841 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7842 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7843 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7845 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7846 MEM_VOLATILE_P (mem
) = 1;
7848 if (GET_MODE (old
) != mode
)
7849 old
= convert_to_mode (mode
, old
, /*unsignedp=*/1);
7850 if (GET_MODE (new) != mode
)
7851 new = convert_to_mode (mode
, new, /*unsignedp=*/1);
7853 if (! register_operand (old
, mode
))
7854 old
= copy_to_mode_reg (mode
, old
);
7855 if (! register_operand (new, mode
))
7856 new = copy_to_mode_reg (mode
, new);
7858 if (! boolp
&& target
&& register_operand (target
, mode
))
7861 tmp
= gen_reg_rtx (mode
);
7863 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7864 convert_move (ccv
, old
, /*unsignedp=*/1);
7865 emit_insn (gen_mf ());
7867 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7869 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7875 target
= gen_reg_rtx (rmode
);
7876 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7882 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7885 ia64_expand_lock_test_and_set (enum machine_mode mode
, tree arglist
,
7889 rtx mem
, new, ret
, insn
;
7891 arg0
= TREE_VALUE (arglist
);
7892 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7893 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7894 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7896 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7897 MEM_VOLATILE_P (mem
) = 1;
7898 if (! register_operand (new, mode
))
7899 new = copy_to_mode_reg (mode
, new);
7901 if (target
&& register_operand (target
, mode
))
7904 ret
= gen_reg_rtx (mode
);
7907 insn
= gen_xchgsi (ret
, mem
, new);
7909 insn
= gen_xchgdi (ret
, mem
, new);
7915 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7918 ia64_expand_lock_release (enum machine_mode mode
, tree arglist
,
7919 rtx target ATTRIBUTE_UNUSED
)
7924 arg0
= TREE_VALUE (arglist
);
7925 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7927 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7928 MEM_VOLATILE_P (mem
) = 1;
7930 emit_move_insn (mem
, const0_rtx
);
7936 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
7937 enum machine_mode mode ATTRIBUTE_UNUSED
,
7938 int ignore ATTRIBUTE_UNUSED
)
7940 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7941 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7942 tree arglist
= TREE_OPERAND (exp
, 1);
7943 enum machine_mode rmode
= VOIDmode
;
7947 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7948 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7953 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7954 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7955 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7956 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7957 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7958 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7959 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7960 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7961 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7962 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7963 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7964 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7965 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7966 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7970 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7975 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7980 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7981 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7982 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7983 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7984 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7985 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7986 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7987 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
7988 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
7989 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
7990 case IA64_BUILTIN_OR_AND_FETCH_DI
:
7991 case IA64_BUILTIN_AND_AND_FETCH_DI
:
7992 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
7993 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8003 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
8004 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
8005 return ia64_expand_compare_and_swap (rmode
, mode
, 1, arglist
,
8008 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
8009 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
8010 return ia64_expand_compare_and_swap (rmode
, mode
, 0, arglist
,
8013 case IA64_BUILTIN_SYNCHRONIZE
:
8014 emit_insn (gen_mf ());
8017 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
8018 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
8019 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
8021 case IA64_BUILTIN_LOCK_RELEASE_SI
:
8022 case IA64_BUILTIN_LOCK_RELEASE_DI
:
8023 return ia64_expand_lock_release (mode
, arglist
, target
);
8025 case IA64_BUILTIN_BSP
:
8026 if (! target
|| ! register_operand (target
, DImode
))
8027 target
= gen_reg_rtx (DImode
);
8028 emit_insn (gen_bsp_value (target
));
8029 #ifdef POINTERS_EXTEND_UNSIGNED
8030 target
= convert_memory_address (ptr_mode
, target
);
8034 case IA64_BUILTIN_FLUSHRS
:
8035 emit_insn (gen_flushrs ());
8038 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
8039 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
8040 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
8042 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
8043 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
8044 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
8046 case IA64_BUILTIN_FETCH_AND_OR_SI
:
8047 case IA64_BUILTIN_FETCH_AND_OR_DI
:
8048 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
8050 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8051 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8052 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
8054 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8055 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8056 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
8058 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8059 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8060 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
8062 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8063 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8064 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
8066 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8067 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8068 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
8070 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8071 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8072 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
8074 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8075 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8076 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
8078 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8079 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8080 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
8082 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8083 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8084 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
8093 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8094 most significant bits of the stack slot. */
8097 ia64_hpux_function_arg_padding (enum machine_mode mode
, tree type
)
8099 /* Exception to normal case for structures/unions/etc. */
8101 if (type
&& AGGREGATE_TYPE_P (type
)
8102 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
8105 /* Fall back to the default. */
8106 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
8109 /* Linked list of all external functions that are to be emitted by GCC.
8110 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8111 order to avoid putting out names that are never really used. */
8113 struct extern_func_list
GTY(())
8115 struct extern_func_list
*next
;
8119 static GTY(()) struct extern_func_list
*extern_func_head
;
8122 ia64_hpux_add_extern_decl (tree decl
)
8124 struct extern_func_list
*p
= ggc_alloc (sizeof (struct extern_func_list
));
8127 p
->next
= extern_func_head
;
8128 extern_func_head
= p
;
8131 /* Print out the list of used global functions. */
8134 ia64_hpux_file_end (void)
8136 struct extern_func_list
*p
;
8138 for (p
= extern_func_head
; p
; p
= p
->next
)
8140 tree decl
= p
->decl
;
8141 tree id
= DECL_ASSEMBLER_NAME (decl
);
8146 if (!TREE_ASM_WRITTEN (decl
) && TREE_SYMBOL_REFERENCED (id
))
8148 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
8150 TREE_ASM_WRITTEN (decl
) = 1;
8151 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
8152 fputs (TYPE_ASM_OP
, asm_out_file
);
8153 assemble_name (asm_out_file
, name
);
8154 fprintf (asm_out_file
, "," TYPE_OPERAND_FMT
"\n", "function");
8158 extern_func_head
= 0;
8161 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8162 modes of word_mode and larger. Rename the TFmode libfuncs using the
8163 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8164 backward compatibility. */
8167 ia64_init_libfuncs (void)
8169 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
8170 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
8171 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
8172 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
8174 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
8175 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
8176 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
8177 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
8178 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
8180 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
8181 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
8182 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
8183 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
8184 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
8185 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
8187 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
8188 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
8189 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
8190 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
8192 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
8193 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
8196 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8199 ia64_hpux_init_libfuncs (void)
8201 ia64_init_libfuncs ();
8203 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
8204 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
8205 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
8207 /* ia64_expand_compare uses this. */
8208 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
8210 /* These should never be used. */
8211 set_optab_libfunc (eq_optab
, TFmode
, 0);
8212 set_optab_libfunc (ne_optab
, TFmode
, 0);
8213 set_optab_libfunc (gt_optab
, TFmode
, 0);
8214 set_optab_libfunc (ge_optab
, TFmode
, 0);
8215 set_optab_libfunc (lt_optab
, TFmode
, 0);
8216 set_optab_libfunc (le_optab
, TFmode
, 0);
8219 /* Rename the division and modulus functions in VMS. */
8222 ia64_vms_init_libfuncs (void)
8224 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
8225 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
8226 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
8227 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
8228 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
8229 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
8230 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
8231 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
8234 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8235 the HPUX conventions. */
8238 ia64_sysv4_init_libfuncs (void)
8240 ia64_init_libfuncs ();
8242 /* These functions are not part of the HPUX TFmode interface. We
8243 use them instead of _U_Qfcmp, which doesn't work the way we
8245 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
8246 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
8247 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
8248 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
8249 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
8250 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
8252 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8253 glibc doesn't have them. */
8256 /* Switch to the section to which we should output X. The only thing
8257 special we do here is to honor small data. */
8260 ia64_select_rtx_section (enum machine_mode mode
, rtx x
,
8261 unsigned HOST_WIDE_INT align
)
8263 if (GET_MODE_SIZE (mode
) > 0
8264 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
8267 default_elf_select_rtx_section (mode
, x
, align
);
8270 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8271 Pretend flag_pic is always set. */
8274 ia64_rwreloc_select_section (tree exp
, int reloc
, unsigned HOST_WIDE_INT align
)
8276 default_elf_select_section_1 (exp
, reloc
, align
, true);
8280 ia64_rwreloc_unique_section (tree decl
, int reloc
)
8282 default_unique_section_1 (decl
, reloc
, true);
8286 ia64_rwreloc_select_rtx_section (enum machine_mode mode
, rtx x
,
8287 unsigned HOST_WIDE_INT align
)
8289 int save_pic
= flag_pic
;
8291 ia64_select_rtx_section (mode
, x
, align
);
8292 flag_pic
= save_pic
;
8296 ia64_rwreloc_section_type_flags (tree decl
, const char *name
, int reloc
)
8298 return default_section_type_flags_1 (decl
, name
, reloc
, true);
8301 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8302 structure type and that the address of that type should be passed
8303 in out0, rather than in r8. */
8306 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
8308 tree ret_type
= TREE_TYPE (fntype
);
8310 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8311 as the structure return address parameter, if the return value
8312 type has a non-trivial copy constructor or destructor. It is not
8313 clear if this same convention should be used for other
8314 programming languages. Until G++ 3.4, we incorrectly used r8 for
8315 these return values. */
8316 return (abi_version_at_least (2)
8318 && TYPE_MODE (ret_type
) == BLKmode
8319 && TREE_ADDRESSABLE (ret_type
)
8320 && strcmp (lang_hooks
.name
, "GNU C++") == 0);
8323 /* Output the assembler code for a thunk function. THUNK_DECL is the
8324 declaration for the thunk function itself, FUNCTION is the decl for
8325 the target function. DELTA is an immediate constant offset to be
8326 added to THIS. If VCALL_OFFSET is nonzero, the word at
8327 *(*this + vcall_offset) should be added to THIS. */
8330 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
8331 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
8334 rtx
this, insn
, funexp
;
8335 unsigned int this_parmno
;
8336 unsigned int this_regno
;
8338 reload_completed
= 1;
8339 epilogue_completed
= 1;
8341 reset_block_changes ();
8343 /* Set things up as ia64_expand_prologue might. */
8344 last_scratch_gr_reg
= 15;
8346 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
8347 current_frame_info
.spill_cfa_off
= -16;
8348 current_frame_info
.n_input_regs
= 1;
8349 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
8351 /* Mark the end of the (empty) prologue. */
8352 emit_note (NOTE_INSN_PROLOGUE_END
);
8354 /* Figure out whether "this" will be the first parameter (the
8355 typical case) or the second parameter (as happens when the
8356 virtual function returns certain class objects). */
8358 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
8360 this_regno
= IN_REG (this_parmno
);
8361 if (!TARGET_REG_NAMES
)
8362 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
8364 this = gen_rtx_REG (Pmode
, this_regno
);
8367 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
8368 REG_POINTER (tmp
) = 1;
8369 if (delta
&& CONST_OK_FOR_I (delta
))
8371 emit_insn (gen_ptr_extend_plus_imm (this, tmp
, GEN_INT (delta
)));
8375 emit_insn (gen_ptr_extend (this, tmp
));
8378 /* Apply the constant offset, if required. */
8381 rtx delta_rtx
= GEN_INT (delta
);
8383 if (!CONST_OK_FOR_I (delta
))
8385 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8386 emit_move_insn (tmp
, delta_rtx
);
8389 emit_insn (gen_adddi3 (this, this, delta_rtx
));
8392 /* Apply the offset from the vtable, if required. */
8395 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
8396 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8400 rtx t
= gen_rtx_REG (ptr_mode
, 2);
8401 REG_POINTER (t
) = 1;
8402 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this));
8403 if (CONST_OK_FOR_I (vcall_offset
))
8405 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
,
8410 emit_insn (gen_ptr_extend (tmp
, t
));
8413 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this));
8417 if (!CONST_OK_FOR_J (vcall_offset
))
8419 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
8420 emit_move_insn (tmp2
, vcall_offset_rtx
);
8421 vcall_offset_rtx
= tmp2
;
8423 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
8427 emit_move_insn (gen_rtx_REG (ptr_mode
, 2),
8428 gen_rtx_MEM (ptr_mode
, tmp
));
8430 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
8432 emit_insn (gen_adddi3 (this, this, tmp
));
8435 /* Generate a tail call to the target function. */
8436 if (! TREE_USED (function
))
8438 assemble_external (function
);
8439 TREE_USED (function
) = 1;
8441 funexp
= XEXP (DECL_RTL (function
), 0);
8442 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
8443 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
8444 insn
= get_last_insn ();
8445 SIBLING_CALL_P (insn
) = 1;
8447 /* Code generation for calls relies on splitting. */
8448 reload_completed
= 1;
8449 epilogue_completed
= 1;
8450 try_split (PATTERN (insn
), insn
, 0);
8454 /* Run just enough of rest_of_compilation to get the insns emitted.
8455 There's not really enough bulk here to make other passes such as
8456 instruction scheduling worth while. Note that use_thunk calls
8457 assemble_start_function and assemble_end_function. */
8459 insn_locators_initialize ();
8460 emit_all_insn_group_barriers (NULL
);
8461 insn
= get_insns ();
8462 shorten_branches (insn
);
8463 final_start_function (insn
, file
, 1);
8464 final (insn
, file
, 1, 0);
8465 final_end_function ();
8467 reload_completed
= 0;
8468 epilogue_completed
= 0;
8472 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8475 ia64_struct_value_rtx (tree fntype
,
8476 int incoming ATTRIBUTE_UNUSED
)
8478 if (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
))
8480 return gen_rtx_REG (Pmode
, GR_REG (8));
8483 #include "gt-ia64.h"