1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson <wilson@cygnus.com> and
5 David Mosberger <davidm@hpl.hp.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
46 #include "sched-int.h"
49 #include "target-def.h"
52 #include "langhooks.h"
53 #include "cfglayout.h"
54 #include "tree-gimple.h"
56 /* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58 int ia64_asm_output_label
= 0;
60 /* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62 struct rtx_def
* ia64_compare_op0
;
63 struct rtx_def
* ia64_compare_op1
;
65 /* Register names for ia64_expand_prologue. */
66 static const char * const ia64_reg_numbers
[96] =
67 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
80 /* ??? These strings could be shared with REGISTER_NAMES. */
81 static const char * const ia64_input_reg_names
[8] =
82 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
84 /* ??? These strings could be shared with REGISTER_NAMES. */
85 static const char * const ia64_local_reg_names
[80] =
86 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
97 /* ??? These strings could be shared with REGISTER_NAMES. */
98 static const char * const ia64_output_reg_names
[8] =
99 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
101 /* String used with the -mfixed-range= option. */
102 const char *ia64_fixed_range_string
;
104 /* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106 int ia64_tls_size
= 22;
108 /* String used with the -mtls-size= option. */
109 const char *ia64_tls_size_string
;
111 /* Which cpu are we scheduling for. */
112 enum processor_type ia64_tune
;
114 /* String used with the -tune= option. */
115 const char *ia64_tune_string
;
117 /* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119 static int ia64_flag_schedule_insns2
;
121 /* Determines whether we run variable tracking in machine dependent
123 static int ia64_flag_var_tracking
;
125 /* Variables which are this size or smaller are put in the sdata/sbss
128 unsigned int ia64_section_threshold
;
130 /* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
134 /* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
137 struct ia64_frame_info
139 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
144 HARD_REG_SET mask
; /* mask of saved registers. */
145 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
146 registers or long-term scratches. */
147 int n_spilled
; /* number of spilled registers. */
148 int reg_fp
; /* register for fp. */
149 int reg_save_b0
; /* save register for b0. */
150 int reg_save_pr
; /* save register for prs. */
151 int reg_save_ar_pfs
; /* save register for ar.pfs. */
152 int reg_save_ar_unat
; /* save register for ar.unat. */
153 int reg_save_ar_lc
; /* save register for ar.lc. */
154 int reg_save_gp
; /* save register for gp. */
155 int n_input_regs
; /* number of input registers used. */
156 int n_local_regs
; /* number of local registers used. */
157 int n_output_regs
; /* number of output registers used. */
158 int n_rotate_regs
; /* number of rotating registers used. */
160 char need_regstk
; /* true if a .regstk directive needed. */
161 char initialized
; /* true if the data is finalized. */
164 /* Current frame information calculated by ia64_compute_frame_size. */
165 static struct ia64_frame_info current_frame_info
;
167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
168 static void ia64_dependencies_evaluation_hook (rtx
, rtx
);
169 static void ia64_init_dfa_pre_cycle_insn (void);
170 static rtx
ia64_dfa_pre_cycle_insn (void);
171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx
);
172 static int ia64_dfa_new_cycle (FILE *, int, rtx
, int, int, int *);
173 static rtx
gen_tls_get_addr (void);
174 static rtx
gen_thread_pointer (void);
175 static rtx
ia64_expand_tls_address (enum tls_model
, rtx
, rtx
);
176 static int find_gr_spill (int);
177 static int next_scratch_gr_reg (void);
178 static void mark_reg_gr_used_mask (rtx
, void *);
179 static void ia64_compute_frame_size (HOST_WIDE_INT
);
180 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
181 static void finish_spill_pointers (void);
182 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
183 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
184 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
185 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
186 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
187 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
189 static enum machine_mode
hfa_element_mode (tree
, int);
190 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
192 static bool ia64_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
194 static bool ia64_function_ok_for_sibcall (tree
, tree
);
195 static bool ia64_return_in_memory (tree
, tree
);
196 static bool ia64_rtx_costs (rtx
, int, int, int *);
197 static void fix_range (const char *);
198 static struct machine_function
* ia64_init_machine_status (void);
199 static void emit_insn_group_barriers (FILE *);
200 static void emit_all_insn_group_barriers (FILE *);
201 static void final_emit_insn_group_barriers (FILE *);
202 static void emit_predicate_relation_info (void);
203 static void ia64_reorg (void);
204 static bool ia64_in_small_data_p (tree
);
205 static void process_epilogue (void);
206 static int process_set (FILE *, rtx
);
208 static rtx
ia64_expand_fetch_and_op (optab
, enum machine_mode
, tree
, rtx
);
209 static rtx
ia64_expand_op_and_fetch (optab
, enum machine_mode
, tree
, rtx
);
210 static rtx
ia64_expand_compare_and_swap (enum machine_mode
, enum machine_mode
,
212 static rtx
ia64_expand_lock_test_and_set (enum machine_mode
, tree
, rtx
);
213 static rtx
ia64_expand_lock_release (enum machine_mode
, tree
, rtx
);
214 static bool ia64_assemble_integer (rtx
, unsigned int, int);
215 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT
);
216 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT
);
217 static void ia64_output_function_end_prologue (FILE *);
219 static int ia64_issue_rate (void);
220 static int ia64_adjust_cost (rtx
, rtx
, rtx
, int);
221 static void ia64_sched_init (FILE *, int, int);
222 static void ia64_sched_finish (FILE *, int);
223 static int ia64_dfa_sched_reorder (FILE *, int, rtx
*, int *, int, int);
224 static int ia64_sched_reorder (FILE *, int, rtx
*, int *, int);
225 static int ia64_sched_reorder2 (FILE *, int, rtx
*, int *, int);
226 static int ia64_variable_issue (FILE *, int, rtx
, int);
228 static struct bundle_state
*get_free_bundle_state (void);
229 static void free_bundle_state (struct bundle_state
*);
230 static void initiate_bundle_states (void);
231 static void finish_bundle_states (void);
232 static unsigned bundle_state_hash (const void *);
233 static int bundle_state_eq_p (const void *, const void *);
234 static int insert_bundle_state (struct bundle_state
*);
235 static void initiate_bundle_state_table (void);
236 static void finish_bundle_state_table (void);
237 static int try_issue_nops (struct bundle_state
*, int);
238 static int try_issue_insn (struct bundle_state
*, rtx
);
239 static void issue_nops_and_insn (struct bundle_state
*, int, rtx
, int, int);
240 static int get_max_pos (state_t
);
241 static int get_template (state_t
, int);
243 static rtx
get_next_important_insn (rtx
, rtx
);
244 static void bundling (FILE *, int, rtx
, rtx
);
246 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
247 HOST_WIDE_INT
, tree
);
248 static void ia64_file_start (void);
250 static void ia64_select_rtx_section (enum machine_mode
, rtx
,
251 unsigned HOST_WIDE_INT
);
252 static void ia64_rwreloc_select_section (tree
, int, unsigned HOST_WIDE_INT
)
254 static void ia64_rwreloc_unique_section (tree
, int)
256 static void ia64_rwreloc_select_rtx_section (enum machine_mode
, rtx
,
257 unsigned HOST_WIDE_INT
)
259 static unsigned int ia64_rwreloc_section_type_flags (tree
, const char *, int)
262 static void ia64_hpux_add_extern_decl (tree decl
)
264 static void ia64_hpux_file_end (void)
266 static void ia64_init_libfuncs (void)
268 static void ia64_hpux_init_libfuncs (void)
270 static void ia64_sysv4_init_libfuncs (void)
272 static void ia64_vms_init_libfuncs (void)
275 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
276 static void ia64_encode_section_info (tree
, rtx
, int);
277 static rtx
ia64_struct_value_rtx (tree
, int);
278 static tree
ia64_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
281 /* Table of valid machine attributes. */
282 static const struct attribute_spec ia64_attribute_table
[] =
284 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
285 { "syscall_linkage", 0, 0, false, true, true, NULL
},
286 { "model", 1, 1, true, false, false, ia64_handle_model_attribute
},
287 { NULL
, 0, 0, false, false, false, NULL
}
290 /* Initialize the GCC target structure. */
291 #undef TARGET_ATTRIBUTE_TABLE
292 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS ia64_init_builtins
297 #undef TARGET_EXPAND_BUILTIN
298 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
300 #undef TARGET_ASM_BYTE_OP
301 #define TARGET_ASM_BYTE_OP "\tdata1\t"
302 #undef TARGET_ASM_ALIGNED_HI_OP
303 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
304 #undef TARGET_ASM_ALIGNED_SI_OP
305 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
306 #undef TARGET_ASM_ALIGNED_DI_OP
307 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
308 #undef TARGET_ASM_UNALIGNED_HI_OP
309 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
310 #undef TARGET_ASM_UNALIGNED_SI_OP
311 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
312 #undef TARGET_ASM_UNALIGNED_DI_OP
313 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
314 #undef TARGET_ASM_INTEGER
315 #define TARGET_ASM_INTEGER ia64_assemble_integer
317 #undef TARGET_ASM_FUNCTION_PROLOGUE
318 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
319 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
320 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
321 #undef TARGET_ASM_FUNCTION_EPILOGUE
322 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
324 #undef TARGET_IN_SMALL_DATA_P
325 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
327 #undef TARGET_SCHED_ADJUST_COST
328 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
329 #undef TARGET_SCHED_ISSUE_RATE
330 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
331 #undef TARGET_SCHED_VARIABLE_ISSUE
332 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
333 #undef TARGET_SCHED_INIT
334 #define TARGET_SCHED_INIT ia64_sched_init
335 #undef TARGET_SCHED_FINISH
336 #define TARGET_SCHED_FINISH ia64_sched_finish
337 #undef TARGET_SCHED_REORDER
338 #define TARGET_SCHED_REORDER ia64_sched_reorder
339 #undef TARGET_SCHED_REORDER2
340 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
342 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
343 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
345 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
346 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
348 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
349 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
350 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
351 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
353 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
354 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
355 ia64_first_cycle_multipass_dfa_lookahead_guard
357 #undef TARGET_SCHED_DFA_NEW_CYCLE
358 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
360 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
361 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
365 #undef TARGET_ASM_OUTPUT_MI_THUNK
366 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
367 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
368 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
370 #undef TARGET_ASM_FILE_START
371 #define TARGET_ASM_FILE_START ia64_file_start
373 #undef TARGET_RTX_COSTS
374 #define TARGET_RTX_COSTS ia64_rtx_costs
375 #undef TARGET_ADDRESS_COST
376 #define TARGET_ADDRESS_COST hook_int_rtx_0
378 #undef TARGET_MACHINE_DEPENDENT_REORG
379 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
381 #undef TARGET_ENCODE_SECTION_INFO
382 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
384 /* ??? ABI doesn't allow us to define this. */
386 #undef TARGET_PROMOTE_FUNCTION_ARGS
387 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
390 /* ??? ABI doesn't allow us to define this. */
392 #undef TARGET_PROMOTE_FUNCTION_RETURN
393 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
396 /* ??? Investigate. */
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
402 #undef TARGET_STRUCT_VALUE_RTX
403 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
404 #undef TARGET_RETURN_IN_MEMORY
405 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406 #undef TARGET_SETUP_INCOMING_VARARGS
407 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
408 #undef TARGET_STRICT_ARGUMENT_NAMING
409 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
410 #undef TARGET_MUST_PASS_IN_STACK
411 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
413 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
414 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
416 #undef TARGET_UNWIND_EMIT
417 #define TARGET_UNWIND_EMIT process_for_unwind_directive
419 struct gcc_target targetm
= TARGET_INITIALIZER
;
423 ADDR_AREA_NORMAL
, /* normal address area */
424 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
428 static GTY(()) tree small_ident1
;
429 static GTY(()) tree small_ident2
;
434 if (small_ident1
== 0)
436 small_ident1
= get_identifier ("small");
437 small_ident2
= get_identifier ("__small__");
441 /* Retrieve the address area that has been chosen for the given decl. */
443 static ia64_addr_area
444 ia64_get_addr_area (tree decl
)
448 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
454 id
= TREE_VALUE (TREE_VALUE (model_attr
));
455 if (id
== small_ident1
|| id
== small_ident2
)
456 return ADDR_AREA_SMALL
;
458 return ADDR_AREA_NORMAL
;
462 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
, int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
464 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
466 tree arg
, decl
= *node
;
469 arg
= TREE_VALUE (args
);
470 if (arg
== small_ident1
|| arg
== small_ident2
)
472 addr_area
= ADDR_AREA_SMALL
;
476 warning ("invalid argument of `%s' attribute",
477 IDENTIFIER_POINTER (name
));
478 *no_add_attrs
= true;
481 switch (TREE_CODE (decl
))
484 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
486 && !TREE_STATIC (decl
))
488 error ("%Jan address area attribute cannot be specified for "
489 "local variables", decl
, decl
);
490 *no_add_attrs
= true;
492 area
= ia64_get_addr_area (decl
);
493 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
495 error ("%Jaddress area of '%s' conflicts with previous "
496 "declaration", decl
, decl
);
497 *no_add_attrs
= true;
502 error ("%Jaddress area attribute cannot be specified for functions",
504 *no_add_attrs
= true;
508 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
509 *no_add_attrs
= true;
517 ia64_encode_addr_area (tree decl
, rtx symbol
)
521 flags
= SYMBOL_REF_FLAGS (symbol
);
522 switch (ia64_get_addr_area (decl
))
524 case ADDR_AREA_NORMAL
: break;
525 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
528 SYMBOL_REF_FLAGS (symbol
) = flags
;
532 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
534 default_encode_section_info (decl
, rtl
, first
);
536 /* Careful not to prod global register variables. */
537 if (TREE_CODE (decl
) == VAR_DECL
538 && GET_CODE (DECL_RTL (decl
)) == MEM
539 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
540 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
541 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
544 /* Return 1 if the operands of a move are ok. */
547 ia64_move_ok (rtx dst
, rtx src
)
549 /* If we're under init_recog_no_volatile, we'll not be able to use
550 memory_operand. So check the code directly and don't worry about
551 the validity of the underlying address, which should have been
552 checked elsewhere anyway. */
553 if (GET_CODE (dst
) != MEM
)
555 if (GET_CODE (src
) == MEM
)
557 if (register_operand (src
, VOIDmode
))
560 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
561 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
562 return src
== const0_rtx
;
564 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
568 addp4_optimize_ok (rtx op1
, rtx op2
)
570 return (basereg_operand (op1
, GET_MODE(op1
)) !=
571 basereg_operand (op2
, GET_MODE(op2
)));
574 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
575 Return the length of the field, or <= 0 on failure. */
578 ia64_depz_field_mask (rtx rop
, rtx rshift
)
580 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
581 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
583 /* Get rid of the zero bits we're shifting in. */
586 /* We must now have a solid block of 1's at bit 0. */
587 return exact_log2 (op
+ 1);
590 /* Expand a symbolic constant load. */
593 ia64_expand_load_address (rtx dest
, rtx src
)
595 if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (src
))
597 if (GET_CODE (dest
) != REG
)
600 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
601 having to pointer-extend the value afterward. Other forms of address
602 computation below are also more natural to compute as 64-bit quantities.
603 If we've been given an SImode destination register, change it. */
604 if (GET_MODE (dest
) != Pmode
)
605 dest
= gen_rtx_REG (Pmode
, REGNO (dest
));
607 if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_SMALL_ADDR_P (src
))
609 emit_insn (gen_rtx_SET (VOIDmode
, dest
, src
));
612 else if (TARGET_AUTO_PIC
)
614 emit_insn (gen_load_gprel64 (dest
, src
));
617 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
619 emit_insn (gen_load_fptr (dest
, src
));
622 else if (sdata_symbolic_operand (src
, VOIDmode
))
624 emit_insn (gen_load_gprel (dest
, src
));
628 if (GET_CODE (src
) == CONST
629 && GET_CODE (XEXP (src
, 0)) == PLUS
630 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
631 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x3fff) != 0)
633 rtx sym
= XEXP (XEXP (src
, 0), 0);
634 HOST_WIDE_INT ofs
, hi
, lo
;
636 /* Split the offset into a sign extended 14-bit low part
637 and a complementary high part. */
638 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
639 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
642 ia64_expand_load_address (dest
, plus_constant (sym
, hi
));
643 emit_insn (gen_adddi3 (dest
, dest
, GEN_INT (lo
)));
649 tmp
= gen_rtx_HIGH (Pmode
, src
);
650 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
651 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
653 tmp
= gen_rtx_LO_SUM (GET_MODE (dest
), dest
, src
);
654 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
658 static GTY(()) rtx gen_tls_tga
;
660 gen_tls_get_addr (void)
663 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
667 static GTY(()) rtx thread_pointer_rtx
;
669 gen_thread_pointer (void)
671 if (!thread_pointer_rtx
)
672 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
673 return thread_pointer_rtx
;
677 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
)
679 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
684 case TLS_MODEL_GLOBAL_DYNAMIC
:
687 tga_op1
= gen_reg_rtx (Pmode
);
688 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
689 tga_op1
= gen_const_mem (Pmode
, tga_op1
);
691 tga_op2
= gen_reg_rtx (Pmode
);
692 emit_insn (gen_load_ltoff_dtprel (tga_op2
, op1
));
693 tga_op2
= gen_const_mem (Pmode
, tga_op2
);
695 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
696 LCT_CONST
, Pmode
, 2, tga_op1
,
697 Pmode
, tga_op2
, Pmode
);
699 insns
= get_insns ();
702 if (GET_MODE (op0
) != Pmode
)
704 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
707 case TLS_MODEL_LOCAL_DYNAMIC
:
708 /* ??? This isn't the completely proper way to do local-dynamic
709 If the call to __tls_get_addr is used only by a single symbol,
710 then we should (somehow) move the dtprel to the second arg
711 to avoid the extra add. */
714 tga_op1
= gen_reg_rtx (Pmode
);
715 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
716 tga_op1
= gen_const_mem (Pmode
, tga_op1
);
718 tga_op2
= const0_rtx
;
720 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
721 LCT_CONST
, Pmode
, 2, tga_op1
,
722 Pmode
, tga_op2
, Pmode
);
724 insns
= get_insns ();
727 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
729 tmp
= gen_reg_rtx (Pmode
);
730 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
732 if (!register_operand (op0
, Pmode
))
733 op0
= gen_reg_rtx (Pmode
);
736 emit_insn (gen_load_dtprel (op0
, op1
));
737 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
740 emit_insn (gen_add_dtprel (op0
, tmp
, op1
));
743 case TLS_MODEL_INITIAL_EXEC
:
744 tmp
= gen_reg_rtx (Pmode
);
745 emit_insn (gen_load_ltoff_tprel (tmp
, op1
));
746 tmp
= gen_const_mem (Pmode
, tmp
);
747 tmp
= force_reg (Pmode
, tmp
);
749 if (!register_operand (op0
, Pmode
))
750 op0
= gen_reg_rtx (Pmode
);
751 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
754 case TLS_MODEL_LOCAL_EXEC
:
755 if (!register_operand (op0
, Pmode
))
756 op0
= gen_reg_rtx (Pmode
);
759 emit_insn (gen_load_tprel (op0
, op1
));
760 emit_insn (gen_adddi3 (op0
, gen_thread_pointer (), op0
));
763 emit_insn (gen_add_tprel (op0
, gen_thread_pointer (), op1
));
772 if (GET_MODE (orig_op0
) == Pmode
)
774 return gen_lowpart (GET_MODE (orig_op0
), op0
);
778 ia64_expand_move (rtx op0
, rtx op1
)
780 enum machine_mode mode
= GET_MODE (op0
);
782 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
783 op1
= force_reg (mode
, op1
);
785 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
787 enum tls_model tls_kind
;
788 if (GET_CODE (op1
) == SYMBOL_REF
789 && (tls_kind
= SYMBOL_REF_TLS_MODEL (op1
)))
790 return ia64_expand_tls_address (tls_kind
, op0
, op1
);
792 if (!TARGET_NO_PIC
&& reload_completed
)
794 ia64_expand_load_address (op0
, op1
);
802 /* Split a move from OP1 to OP0 conditional on COND. */
805 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
807 rtx insn
, first
= get_last_insn ();
809 emit_move_insn (op0
, op1
);
811 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
813 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
817 /* Split a post-reload TImode or TFmode reference into two DImode
818 components. This is made extra difficult by the fact that we do
819 not get any scratch registers to work with, because reload cannot
820 be prevented from giving us a scratch that overlaps the register
821 pair involved. So instead, when addressing memory, we tweak the
822 pointer register up and back down with POST_INCs. Or up and not
823 back down when we can get away with it.
825 REVERSED is true when the loads must be done in reversed order
826 (high word first) for correctness. DEAD is true when the pointer
827 dies with the second insn we generate and therefore the second
828 address must not carry a postmodify.
830 May return an insn which is to be emitted after the moves. */
833 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
837 switch (GET_CODE (in
))
840 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
841 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
846 /* Cannot occur reversed. */
847 if (reversed
) abort ();
849 if (GET_MODE (in
) != TFmode
)
850 split_double (in
, &out
[0], &out
[1]);
852 /* split_double does not understand how to split a TFmode
853 quantity into a pair of DImode constants. */
856 unsigned HOST_WIDE_INT p
[2];
857 long l
[4]; /* TFmode is 128 bits */
859 REAL_VALUE_FROM_CONST_DOUBLE (r
, in
);
860 real_to_target (l
, &r
, TFmode
);
862 if (FLOAT_WORDS_BIG_ENDIAN
)
864 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
865 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
869 p
[0] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
870 p
[1] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
872 out
[0] = GEN_INT (p
[0]);
873 out
[1] = GEN_INT (p
[1]);
879 rtx base
= XEXP (in
, 0);
882 switch (GET_CODE (base
))
887 out
[0] = adjust_automodify_address
888 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
889 out
[1] = adjust_automodify_address
890 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
894 /* Reversal requires a pre-increment, which can only
895 be done as a separate insn. */
896 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
897 out
[0] = adjust_automodify_address
898 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
899 out
[1] = adjust_address (in
, DImode
, 0);
904 if (reversed
|| dead
) abort ();
905 /* Just do the increment in two steps. */
906 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
907 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
911 if (reversed
|| dead
) abort ();
912 /* Add 8, subtract 24. */
913 base
= XEXP (base
, 0);
914 out
[0] = adjust_automodify_address
915 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
916 out
[1] = adjust_automodify_address
918 gen_rtx_POST_MODIFY (Pmode
, base
, plus_constant (base
, -24)),
923 if (reversed
|| dead
) abort ();
924 /* Extract and adjust the modification. This case is
925 trickier than the others, because we might have an
926 index register, or we might have a combined offset that
927 doesn't fit a signed 9-bit displacement field. We can
928 assume the incoming expression is already legitimate. */
929 offset
= XEXP (base
, 1);
930 base
= XEXP (base
, 0);
932 out
[0] = adjust_automodify_address
933 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
935 if (GET_CODE (XEXP (offset
, 1)) == REG
)
937 /* Can't adjust the postmodify to match. Emit the
938 original, then a separate addition insn. */
939 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
940 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
942 else if (GET_CODE (XEXP (offset
, 1)) != CONST_INT
)
944 else if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
946 /* Again the postmodify cannot be made to match, but
947 in this case it's more efficient to get rid of the
948 postmodify entirely and fix up with an add insn. */
949 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
950 fixup
= gen_adddi3 (base
, base
,
951 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
955 /* Combined offset still fits in the displacement field.
956 (We cannot overflow it at the high end.) */
957 out
[1] = adjust_automodify_address
959 gen_rtx_POST_MODIFY (Pmode
, base
,
960 gen_rtx_PLUS (Pmode
, base
,
961 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
979 /* Split a TImode or TFmode move instruction after reload.
980 This is used by *movtf_internal and *movti_internal. */
982 ia64_split_tmode_move (rtx operands
[])
984 rtx in
[2], out
[2], insn
;
987 bool reversed
= false;
989 /* It is possible for reload to decide to overwrite a pointer with
990 the value it points to. In that case we have to do the loads in
991 the appropriate order so that the pointer is not destroyed too
992 early. Also we must not generate a postmodify for that second
993 load, or rws_access_regno will abort. */
994 if (GET_CODE (operands
[1]) == MEM
995 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
997 rtx base
= XEXP (operands
[1], 0);
998 while (GET_CODE (base
) != REG
)
999 base
= XEXP (base
, 0);
1001 if (REGNO (base
) == REGNO (operands
[0]))
1005 /* Another reason to do the moves in reversed order is if the first
1006 element of the target register pair is also the second element of
1007 the source register pair. */
1008 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1009 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1012 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1013 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1015 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1016 if (GET_CODE (EXP) == MEM \
1017 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1018 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1019 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1020 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1021 XEXP (XEXP (EXP, 0), 0), \
1024 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[0], in
[0]));
1025 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1026 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1028 insn
= emit_insn (gen_rtx_SET (VOIDmode
, out
[1], in
[1]));
1029 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1030 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1033 emit_insn (fixup
[0]);
1035 emit_insn (fixup
[1]);
1037 #undef MAYBE_ADD_REG_INC_NOTE
1040 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1041 through memory plus an extra GR scratch register. Except that you can
1042 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1043 SECONDARY_RELOAD_CLASS, but not both.
1045 We got into problems in the first place by allowing a construct like
1046 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1047 This solution attempts to prevent this situation from occurring. When
1048 we see something like the above, we spill the inner register to memory. */
1051 spill_xfmode_operand (rtx in
, int force
)
1053 if (GET_CODE (in
) == SUBREG
1054 && GET_MODE (SUBREG_REG (in
)) == TImode
1055 && GET_CODE (SUBREG_REG (in
)) == REG
)
1057 rtx memt
= assign_stack_temp (TImode
, 16, 0);
1058 emit_move_insn (memt
, SUBREG_REG (in
));
1059 return adjust_address (memt
, XFmode
, 0);
1061 else if (force
&& GET_CODE (in
) == REG
)
1063 rtx memx
= assign_stack_temp (XFmode
, 16, 0);
1064 emit_move_insn (memx
, in
);
1071 /* Emit comparison instruction if necessary, returning the expression
1072 that holds the compare result in the proper mode. */
1074 static GTY(()) rtx cmptf_libfunc
;
1077 ia64_expand_compare (enum rtx_code code
, enum machine_mode mode
)
1079 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1082 /* If we have a BImode input, then we already have a compare result, and
1083 do not need to emit another comparison. */
1084 if (GET_MODE (op0
) == BImode
)
1086 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1091 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1092 magic number as its third argument, that indicates what to do.
1093 The return value is an integer to be compared against zero. */
1094 else if (GET_MODE (op0
) == TFmode
)
1097 QCMP_INV
= 1, /* Raise FP_INVALID on SNaN as a side effect. */
1103 enum rtx_code ncode
;
1105 if (!cmptf_libfunc
|| GET_MODE (op1
) != TFmode
)
1109 /* 1 = equal, 0 = not equal. Equality operators do
1110 not raise FP_INVALID when given an SNaN operand. */
1111 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1112 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1113 /* isunordered() from C99. */
1114 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1115 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1116 /* Relational operators raise FP_INVALID when given
1118 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1119 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1120 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1121 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1122 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1123 Expanders for buneq etc. weuld have to be added to ia64.md
1124 for this to be useful. */
1130 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
, 3,
1131 op0
, TFmode
, op1
, TFmode
,
1132 GEN_INT (magic
), DImode
);
1133 cmp
= gen_reg_rtx (BImode
);
1134 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1135 gen_rtx_fmt_ee (ncode
, BImode
,
1138 insns
= get_insns ();
1141 emit_libcall_block (insns
, cmp
, cmp
,
1142 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
));
1147 cmp
= gen_reg_rtx (BImode
);
1148 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1149 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1153 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1156 /* Emit the appropriate sequence for a call. */
1159 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
1164 addr
= XEXP (addr
, 0);
1165 addr
= convert_memory_address (DImode
, addr
);
1166 b0
= gen_rtx_REG (DImode
, R_BR (0));
1168 /* ??? Should do this for functions known to bind local too. */
1169 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1172 insn
= gen_sibcall_nogp (addr
);
1174 insn
= gen_call_nogp (addr
, b0
);
1176 insn
= gen_call_value_nogp (retval
, addr
, b0
);
1177 insn
= emit_call_insn (insn
);
1182 insn
= gen_sibcall_gp (addr
);
1184 insn
= gen_call_gp (addr
, b0
);
1186 insn
= gen_call_value_gp (retval
, addr
, b0
);
1187 insn
= emit_call_insn (insn
);
1189 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
1193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
1197 ia64_reload_gp (void)
1201 if (current_frame_info
.reg_save_gp
)
1202 tmp
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_gp
);
1205 HOST_WIDE_INT offset
;
1207 offset
= (current_frame_info
.spill_cfa_off
1208 + current_frame_info
.spill_size
);
1209 if (frame_pointer_needed
)
1211 tmp
= hard_frame_pointer_rtx
;
1216 tmp
= stack_pointer_rtx
;
1217 offset
= current_frame_info
.total_size
- offset
;
1220 if (CONST_OK_FOR_I (offset
))
1221 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1222 tmp
, GEN_INT (offset
)));
1225 emit_move_insn (pic_offset_table_rtx
, GEN_INT (offset
));
1226 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1227 pic_offset_table_rtx
, tmp
));
1230 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
1233 emit_move_insn (pic_offset_table_rtx
, tmp
);
1237 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
1238 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
1241 bool is_desc
= false;
1243 /* If we find we're calling through a register, then we're actually
1244 calling through a descriptor, so load up the values. */
1245 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
1250 /* ??? We are currently constrained to *not* use peep2, because
1251 we can legitimately change the global lifetime of the GP
1252 (in the form of killing where previously live). This is
1253 because a call through a descriptor doesn't use the previous
1254 value of the GP, while a direct call does, and we do not
1255 commit to either form until the split here.
1257 That said, this means that we lack precise life info for
1258 whether ADDR is dead after this call. This is not terribly
1259 important, since we can fix things up essentially for free
1260 with the POST_DEC below, but it's nice to not use it when we
1261 can immediately tell it's not necessary. */
1262 addr_dead_p
= ((noreturn_p
|| sibcall_p
1263 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
1265 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
1267 /* Load the code address into scratch_b. */
1268 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
1269 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1270 emit_move_insn (scratch_r
, tmp
);
1271 emit_move_insn (scratch_b
, scratch_r
);
1273 /* Load the GP address. If ADDR is not dead here, then we must
1274 revert the change made above via the POST_INCREMENT. */
1276 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
1279 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1280 emit_move_insn (pic_offset_table_rtx
, tmp
);
1287 insn
= gen_sibcall_nogp (addr
);
1289 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
1291 insn
= gen_call_nogp (addr
, retaddr
);
1292 emit_call_insn (insn
);
1294 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
1298 /* Begin the assembly file. */
1301 ia64_file_start (void)
1303 default_file_start ();
1304 emit_safe_across_calls ();
1308 emit_safe_across_calls (void)
1310 unsigned int rs
, re
;
1317 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1321 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1325 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
1329 fputc (',', asm_out_file
);
1331 fprintf (asm_out_file
, "p%u", rs
);
1333 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
1337 fputc ('\n', asm_out_file
);
1340 /* Helper function for ia64_compute_frame_size: find an appropriate general
1341 register to spill some special register to. SPECIAL_SPILL_MASK contains
1342 bits in GR0 to GR31 that have already been allocated by this routine.
1343 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1346 find_gr_spill (int try_locals
)
1350 /* If this is a leaf function, first try an otherwise unused
1351 call-clobbered register. */
1352 if (current_function_is_leaf
)
1354 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1355 if (! regs_ever_live
[regno
]
1356 && call_used_regs
[regno
]
1357 && ! fixed_regs
[regno
]
1358 && ! global_regs
[regno
]
1359 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1361 current_frame_info
.gr_used_mask
|= 1 << regno
;
1368 regno
= current_frame_info
.n_local_regs
;
1369 /* If there is a frame pointer, then we can't use loc79, because
1370 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1371 reg_name switching code in ia64_expand_prologue. */
1372 if (regno
< (80 - frame_pointer_needed
))
1374 current_frame_info
.n_local_regs
= regno
+ 1;
1375 return LOC_REG (0) + regno
;
1379 /* Failed to find a general register to spill to. Must use stack. */
1383 /* In order to make for nice schedules, we try to allocate every temporary
1384 to a different register. We must of course stay away from call-saved,
1385 fixed, and global registers. We must also stay away from registers
1386 allocated in current_frame_info.gr_used_mask, since those include regs
1387 used all through the prologue.
1389 Any register allocated here must be used immediately. The idea is to
1390 aid scheduling, not to solve data flow problems. */
1392 static int last_scratch_gr_reg
;
1395 next_scratch_gr_reg (void)
1399 for (i
= 0; i
< 32; ++i
)
1401 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1402 if (call_used_regs
[regno
]
1403 && ! fixed_regs
[regno
]
1404 && ! global_regs
[regno
]
1405 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1407 last_scratch_gr_reg
= regno
;
1412 /* There must be _something_ available. */
1416 /* Helper function for ia64_compute_frame_size, called through
1417 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1420 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
1422 unsigned int regno
= REGNO (reg
);
1425 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1426 for (i
= 0; i
< n
; ++i
)
1427 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1431 /* Returns the number of bytes offset between the frame pointer and the stack
1432 pointer for the current function. SIZE is the number of bytes of space
1433 needed for local variables. */
1436 ia64_compute_frame_size (HOST_WIDE_INT size
)
1438 HOST_WIDE_INT total_size
;
1439 HOST_WIDE_INT spill_size
= 0;
1440 HOST_WIDE_INT extra_spill_size
= 0;
1441 HOST_WIDE_INT pretend_args_size
;
1444 int spilled_gr_p
= 0;
1445 int spilled_fr_p
= 0;
1449 if (current_frame_info
.initialized
)
1452 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1453 CLEAR_HARD_REG_SET (mask
);
1455 /* Don't allocate scratches to the return register. */
1456 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1458 /* Don't allocate scratches to the EH scratch registers. */
1459 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1460 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1461 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1462 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1464 /* Find the size of the register stack frame. We have only 80 local
1465 registers, because we reserve 8 for the inputs and 8 for the
1468 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1469 since we'll be adjusting that down later. */
1470 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1471 for (; regno
>= LOC_REG (0); regno
--)
1472 if (regs_ever_live
[regno
])
1474 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1476 /* For functions marked with the syscall_linkage attribute, we must mark
1477 all eight input registers as in use, so that locals aren't visible to
1480 if (cfun
->machine
->n_varargs
> 0
1481 || lookup_attribute ("syscall_linkage",
1482 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1483 current_frame_info
.n_input_regs
= 8;
1486 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1487 if (regs_ever_live
[regno
])
1489 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1492 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1493 if (regs_ever_live
[regno
])
1495 i
= regno
- OUT_REG (0) + 1;
1497 /* When -p profiling, we need one output register for the mcount argument.
1498 Likewise for -a profiling for the bb_init_func argument. For -ax
1499 profiling, we need two output registers for the two bb_init_trace_func
1501 if (current_function_profile
)
1503 current_frame_info
.n_output_regs
= i
;
1505 /* ??? No rotating register support yet. */
1506 current_frame_info
.n_rotate_regs
= 0;
1508 /* Discover which registers need spilling, and how much room that
1509 will take. Begin with floating point and general registers,
1510 which will always wind up on the stack. */
1512 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1513 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1515 SET_HARD_REG_BIT (mask
, regno
);
1521 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1522 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1524 SET_HARD_REG_BIT (mask
, regno
);
1530 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1531 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1533 SET_HARD_REG_BIT (mask
, regno
);
1538 /* Now come all special registers that might get saved in other
1539 general registers. */
1541 if (frame_pointer_needed
)
1543 current_frame_info
.reg_fp
= find_gr_spill (1);
1544 /* If we did not get a register, then we take LOC79. This is guaranteed
1545 to be free, even if regs_ever_live is already set, because this is
1546 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1547 as we don't count loc79 above. */
1548 if (current_frame_info
.reg_fp
== 0)
1550 current_frame_info
.reg_fp
= LOC_REG (79);
1551 current_frame_info
.n_local_regs
++;
1555 if (! current_function_is_leaf
)
1557 /* Emit a save of BR0 if we call other functions. Do this even
1558 if this function doesn't return, as EH depends on this to be
1559 able to unwind the stack. */
1560 SET_HARD_REG_BIT (mask
, BR_REG (0));
1562 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1563 if (current_frame_info
.reg_save_b0
== 0)
1569 /* Similarly for ar.pfs. */
1570 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1571 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1572 if (current_frame_info
.reg_save_ar_pfs
== 0)
1574 extra_spill_size
+= 8;
1578 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1579 registers are clobbered, so we fall back to the stack. */
1580 current_frame_info
.reg_save_gp
1581 = (current_function_calls_setjmp
? 0 : find_gr_spill (1));
1582 if (current_frame_info
.reg_save_gp
== 0)
1584 SET_HARD_REG_BIT (mask
, GR_REG (1));
1591 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1593 SET_HARD_REG_BIT (mask
, BR_REG (0));
1598 if (regs_ever_live
[AR_PFS_REGNUM
])
1600 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1601 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1602 if (current_frame_info
.reg_save_ar_pfs
== 0)
1604 extra_spill_size
+= 8;
1610 /* Unwind descriptor hackery: things are most efficient if we allocate
1611 consecutive GR save registers for RP, PFS, FP in that order. However,
1612 it is absolutely critical that FP get the only hard register that's
1613 guaranteed to be free, so we allocated it first. If all three did
1614 happen to be allocated hard regs, and are consecutive, rearrange them
1615 into the preferred order now. */
1616 if (current_frame_info
.reg_fp
!= 0
1617 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1618 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1620 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1621 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1622 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1625 /* See if we need to store the predicate register block. */
1626 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1627 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1629 if (regno
<= PR_REG (63))
1631 SET_HARD_REG_BIT (mask
, PR_REG (0));
1632 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1633 if (current_frame_info
.reg_save_pr
== 0)
1635 extra_spill_size
+= 8;
1639 /* ??? Mark them all as used so that register renaming and such
1640 are free to use them. */
1641 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1642 regs_ever_live
[regno
] = 1;
1645 /* If we're forced to use st8.spill, we're forced to save and restore
1646 ar.unat as well. The check for existing liveness allows inline asm
1647 to touch ar.unat. */
1648 if (spilled_gr_p
|| cfun
->machine
->n_varargs
1649 || regs_ever_live
[AR_UNAT_REGNUM
])
1651 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
1652 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1653 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1654 if (current_frame_info
.reg_save_ar_unat
== 0)
1656 extra_spill_size
+= 8;
1661 if (regs_ever_live
[AR_LC_REGNUM
])
1663 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1664 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1665 if (current_frame_info
.reg_save_ar_lc
== 0)
1667 extra_spill_size
+= 8;
1672 /* If we have an odd number of words of pretend arguments written to
1673 the stack, then the FR save area will be unaligned. We round the
1674 size of this area up to keep things 16 byte aligned. */
1676 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1678 pretend_args_size
= current_function_pretend_args_size
;
1680 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1681 + current_function_outgoing_args_size
);
1682 total_size
= IA64_STACK_ALIGN (total_size
);
1684 /* We always use the 16-byte scratch area provided by the caller, but
1685 if we are a leaf function, there's no one to which we need to provide
1687 if (current_function_is_leaf
)
1688 total_size
= MAX (0, total_size
- 16);
1690 current_frame_info
.total_size
= total_size
;
1691 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1692 current_frame_info
.spill_size
= spill_size
;
1693 current_frame_info
.extra_spill_size
= extra_spill_size
;
1694 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1695 current_frame_info
.n_spilled
= n_spilled
;
1696 current_frame_info
.initialized
= reload_completed
;
1699 /* Compute the initial difference between the specified pair of registers. */
1702 ia64_initial_elimination_offset (int from
, int to
)
1704 HOST_WIDE_INT offset
;
1706 ia64_compute_frame_size (get_frame_size ());
1709 case FRAME_POINTER_REGNUM
:
1710 if (to
== HARD_FRAME_POINTER_REGNUM
)
1712 if (current_function_is_leaf
)
1713 offset
= -current_frame_info
.total_size
;
1715 offset
= -(current_frame_info
.total_size
1716 - current_function_outgoing_args_size
- 16);
1718 else if (to
== STACK_POINTER_REGNUM
)
1720 if (current_function_is_leaf
)
1723 offset
= 16 + current_function_outgoing_args_size
;
1729 case ARG_POINTER_REGNUM
:
1730 /* Arguments start above the 16 byte save area, unless stdarg
1731 in which case we store through the 16 byte save area. */
1732 if (to
== HARD_FRAME_POINTER_REGNUM
)
1733 offset
= 16 - current_function_pretend_args_size
;
1734 else if (to
== STACK_POINTER_REGNUM
)
1735 offset
= (current_frame_info
.total_size
1736 + 16 - current_function_pretend_args_size
);
1748 /* If there are more than a trivial number of register spills, we use
1749 two interleaved iterators so that we can get two memory references
1752 In order to simplify things in the prologue and epilogue expanders,
1753 we use helper functions to fix up the memory references after the
1754 fact with the appropriate offsets to a POST_MODIFY memory mode.
1755 The following data structure tracks the state of the two iterators
1756 while insns are being emitted. */
1758 struct spill_fill_data
1760 rtx init_after
; /* point at which to emit initializations */
1761 rtx init_reg
[2]; /* initial base register */
1762 rtx iter_reg
[2]; /* the iterator registers */
1763 rtx
*prev_addr
[2]; /* address of last memory use */
1764 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
1765 HOST_WIDE_INT prev_off
[2]; /* last offset */
1766 int n_iter
; /* number of iterators in use */
1767 int next_iter
; /* next iterator to use */
1768 unsigned int save_gr_used_mask
;
1771 static struct spill_fill_data spill_fill_data
;
1774 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
1778 spill_fill_data
.init_after
= get_last_insn ();
1779 spill_fill_data
.init_reg
[0] = init_reg
;
1780 spill_fill_data
.init_reg
[1] = init_reg
;
1781 spill_fill_data
.prev_addr
[0] = NULL
;
1782 spill_fill_data
.prev_addr
[1] = NULL
;
1783 spill_fill_data
.prev_insn
[0] = NULL
;
1784 spill_fill_data
.prev_insn
[1] = NULL
;
1785 spill_fill_data
.prev_off
[0] = cfa_off
;
1786 spill_fill_data
.prev_off
[1] = cfa_off
;
1787 spill_fill_data
.next_iter
= 0;
1788 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1790 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1791 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1793 int regno
= next_scratch_gr_reg ();
1794 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1795 current_frame_info
.gr_used_mask
|= 1 << regno
;
1800 finish_spill_pointers (void)
1802 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1806 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
1808 int iter
= spill_fill_data
.next_iter
;
1809 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1810 rtx disp_rtx
= GEN_INT (disp
);
1813 if (spill_fill_data
.prev_addr
[iter
])
1815 if (CONST_OK_FOR_N (disp
))
1817 *spill_fill_data
.prev_addr
[iter
]
1818 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1819 gen_rtx_PLUS (DImode
,
1820 spill_fill_data
.iter_reg
[iter
],
1822 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
1823 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
1824 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
1828 /* ??? Could use register post_modify for loads. */
1829 if (! CONST_OK_FOR_I (disp
))
1831 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1832 emit_move_insn (tmp
, disp_rtx
);
1835 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1836 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1839 /* Micro-optimization: if we've created a frame pointer, it's at
1840 CFA 0, which may allow the real iterator to be initialized lower,
1841 slightly increasing parallelism. Also, if there are few saves
1842 it may eliminate the iterator entirely. */
1844 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1845 && frame_pointer_needed
)
1847 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1848 set_mem_alias_set (mem
, get_varargs_alias_set ());
1856 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1857 spill_fill_data
.init_reg
[iter
]);
1862 if (! CONST_OK_FOR_I (disp
))
1864 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1865 emit_move_insn (tmp
, disp_rtx
);
1869 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1870 spill_fill_data
.init_reg
[iter
],
1877 /* Careful for being the first insn in a sequence. */
1878 if (spill_fill_data
.init_after
)
1879 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
1882 rtx first
= get_insns ();
1884 insn
= emit_insn_before (seq
, first
);
1886 insn
= emit_insn (seq
);
1888 spill_fill_data
.init_after
= insn
;
1890 /* If DISP is 0, we may or may not have a further adjustment
1891 afterward. If we do, then the load/store insn may be modified
1892 to be a post-modify. If we don't, then this copy may be
1893 eliminated by copyprop_hardreg_forward, which makes this
1894 insn garbage, which runs afoul of the sanity check in
1895 propagate_one_insn. So mark this insn as legal to delete. */
1897 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1901 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1903 /* ??? Not all of the spills are for varargs, but some of them are.
1904 The rest of the spills belong in an alias set of their own. But
1905 it doesn't actually hurt to include them here. */
1906 set_mem_alias_set (mem
, get_varargs_alias_set ());
1908 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1909 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1911 if (++iter
>= spill_fill_data
.n_iter
)
1913 spill_fill_data
.next_iter
= iter
;
1919 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
1922 int iter
= spill_fill_data
.next_iter
;
1925 mem
= spill_restore_mem (reg
, cfa_off
);
1926 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1927 spill_fill_data
.prev_insn
[iter
] = insn
;
1934 RTX_FRAME_RELATED_P (insn
) = 1;
1936 /* Don't even pretend that the unwind code can intuit its way
1937 through a pair of interleaved post_modify iterators. Just
1938 provide the correct answer. */
1940 if (frame_pointer_needed
)
1942 base
= hard_frame_pointer_rtx
;
1947 base
= stack_pointer_rtx
;
1948 off
= current_frame_info
.total_size
- cfa_off
;
1952 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1953 gen_rtx_SET (VOIDmode
,
1954 gen_rtx_MEM (GET_MODE (reg
),
1955 plus_constant (base
, off
)),
1962 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
1964 int iter
= spill_fill_data
.next_iter
;
1967 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1968 GEN_INT (cfa_off
)));
1969 spill_fill_data
.prev_insn
[iter
] = insn
;
1972 /* Wrapper functions that discards the CONST_INT spill offset. These
1973 exist so that we can give gr_spill/gr_fill the offset they need and
1974 use a consistent function interface. */
1977 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1979 return gen_movdi (dest
, src
);
1983 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1985 return gen_fr_spill (dest
, src
);
1989 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
1991 return gen_fr_restore (dest
, src
);
1994 /* Called after register allocation to add any instructions needed for the
1995 prologue. Using a prologue insn is favored compared to putting all of the
1996 instructions in output_function_prologue(), since it allows the scheduler
1997 to intermix instructions with the saves of the caller saved registers. In
1998 some cases, it might be necessary to emit a barrier instruction as the last
1999 insn to prevent such scheduling.
2001 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2002 so that the debug info generation code can handle them properly.
2004 The register save area is layed out like so:
2006 [ varargs spill area ]
2007 [ fr register spill area ]
2008 [ br register spill area ]
2009 [ ar register spill area ]
2010 [ pr register spill area ]
2011 [ gr register spill area ] */
2013 /* ??? Get inefficient code when the frame size is larger than can fit in an
2014 adds instruction. */
2017 ia64_expand_prologue (void)
2019 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2020 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2023 ia64_compute_frame_size (get_frame_size ());
2024 last_scratch_gr_reg
= 15;
2026 /* If there is no epilogue, then we don't need some prologue insns.
2027 We need to avoid emitting the dead prologue insns, because flow
2028 will complain about them. */
2033 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
2034 if ((e
->flags
& EDGE_FAKE
) == 0
2035 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2037 epilogue_p
= (e
!= NULL
);
2042 /* Set the local, input, and output register names. We need to do this
2043 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2044 half. If we use in/loc/out register names, then we get assembler errors
2045 in crtn.S because there is no alloc insn or regstk directive in there. */
2046 if (! TARGET_REG_NAMES
)
2048 int inputs
= current_frame_info
.n_input_regs
;
2049 int locals
= current_frame_info
.n_local_regs
;
2050 int outputs
= current_frame_info
.n_output_regs
;
2052 for (i
= 0; i
< inputs
; i
++)
2053 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2054 for (i
= 0; i
< locals
; i
++)
2055 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2056 for (i
= 0; i
< outputs
; i
++)
2057 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2060 /* Set the frame pointer register name. The regnum is logically loc79,
2061 but of course we'll not have allocated that many locals. Rather than
2062 worrying about renumbering the existing rtxs, we adjust the name. */
2063 /* ??? This code means that we can never use one local register when
2064 there is a frame pointer. loc79 gets wasted in this case, as it is
2065 renamed to a register that will never be used. See also the try_locals
2066 code in find_gr_spill. */
2067 if (current_frame_info
.reg_fp
)
2069 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2070 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2071 = reg_names
[current_frame_info
.reg_fp
];
2072 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2075 /* We don't need an alloc instruction if we've used no outputs or locals. */
2076 if (current_frame_info
.n_local_regs
== 0
2077 && current_frame_info
.n_output_regs
== 0
2078 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
2079 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2081 /* If there is no alloc, but there are input registers used, then we
2082 need a .regstk directive. */
2083 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2084 ar_pfs_save_reg
= NULL_RTX
;
2088 current_frame_info
.need_regstk
= 0;
2090 if (current_frame_info
.reg_save_ar_pfs
)
2091 regno
= current_frame_info
.reg_save_ar_pfs
;
2093 regno
= next_scratch_gr_reg ();
2094 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2096 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2097 GEN_INT (current_frame_info
.n_input_regs
),
2098 GEN_INT (current_frame_info
.n_local_regs
),
2099 GEN_INT (current_frame_info
.n_output_regs
),
2100 GEN_INT (current_frame_info
.n_rotate_regs
)));
2101 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2104 /* Set up frame pointer, stack pointer, and spill iterators. */
2106 n_varargs
= cfun
->machine
->n_varargs
;
2107 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2108 stack_pointer_rtx
, 0);
2110 if (frame_pointer_needed
)
2112 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2113 RTX_FRAME_RELATED_P (insn
) = 1;
2116 if (current_frame_info
.total_size
!= 0)
2118 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2121 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2122 offset
= frame_size_rtx
;
2125 regno
= next_scratch_gr_reg ();
2126 offset
= gen_rtx_REG (DImode
, regno
);
2127 emit_move_insn (offset
, frame_size_rtx
);
2130 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2131 stack_pointer_rtx
, offset
));
2133 if (! frame_pointer_needed
)
2135 RTX_FRAME_RELATED_P (insn
) = 1;
2136 if (GET_CODE (offset
) != CONST_INT
)
2139 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2140 gen_rtx_SET (VOIDmode
,
2142 gen_rtx_PLUS (DImode
,
2149 /* ??? At this point we must generate a magic insn that appears to
2150 modify the stack pointer, the frame pointer, and all spill
2151 iterators. This would allow the most scheduling freedom. For
2152 now, just hard stop. */
2153 emit_insn (gen_blockage ());
2156 /* Must copy out ar.unat before doing any integer spills. */
2157 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2159 if (current_frame_info
.reg_save_ar_unat
)
2161 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2164 alt_regno
= next_scratch_gr_reg ();
2165 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2166 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2169 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2170 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2171 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2173 /* Even if we're not going to generate an epilogue, we still
2174 need to save the register so that EH works. */
2175 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2176 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2179 ar_unat_save_reg
= NULL_RTX
;
2181 /* Spill all varargs registers. Do this before spilling any GR registers,
2182 since we want the UNAT bits for the GR registers to override the UNAT
2183 bits from varargs, which we don't care about. */
2186 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2188 reg
= gen_rtx_REG (DImode
, regno
);
2189 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2192 /* Locate the bottom of the register save area. */
2193 cfa_off
= (current_frame_info
.spill_cfa_off
2194 + current_frame_info
.spill_size
2195 + current_frame_info
.extra_spill_size
);
2197 /* Save the predicate register block either in a register or in memory. */
2198 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2200 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2201 if (current_frame_info
.reg_save_pr
!= 0)
2203 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2204 insn
= emit_move_insn (alt_reg
, reg
);
2206 /* ??? Denote pr spill/fill by a DImode move that modifies all
2207 64 hard registers. */
2208 RTX_FRAME_RELATED_P (insn
) = 1;
2210 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2211 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2214 /* Even if we're not going to generate an epilogue, we still
2215 need to save the register so that EH works. */
2217 emit_insn (gen_prologue_use (alt_reg
));
2221 alt_regno
= next_scratch_gr_reg ();
2222 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2223 insn
= emit_move_insn (alt_reg
, reg
);
2224 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2229 /* Handle AR regs in numerical order. All of them get special handling. */
2230 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2231 && current_frame_info
.reg_save_ar_unat
== 0)
2233 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2234 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2238 /* The alloc insn already copied ar.pfs into a general register. The
2239 only thing we have to do now is copy that register to a stack slot
2240 if we'd not allocated a local register for the job. */
2241 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
2242 && current_frame_info
.reg_save_ar_pfs
== 0)
2244 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2245 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2249 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2251 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2252 if (current_frame_info
.reg_save_ar_lc
!= 0)
2254 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2255 insn
= emit_move_insn (alt_reg
, reg
);
2256 RTX_FRAME_RELATED_P (insn
) = 1;
2258 /* Even if we're not going to generate an epilogue, we still
2259 need to save the register so that EH works. */
2261 emit_insn (gen_prologue_use (alt_reg
));
2265 alt_regno
= next_scratch_gr_reg ();
2266 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2267 emit_move_insn (alt_reg
, reg
);
2268 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2273 if (current_frame_info
.reg_save_gp
)
2275 insn
= emit_move_insn (gen_rtx_REG (DImode
,
2276 current_frame_info
.reg_save_gp
),
2277 pic_offset_table_rtx
);
2278 /* We don't know for sure yet if this is actually needed, since
2279 we've not split the PIC call patterns. If all of the calls
2280 are indirect, and not followed by any uses of the gp, then
2281 this save is dead. Allow it to go away. */
2283 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, REG_NOTES (insn
));
2286 /* We should now be at the base of the gr/br/fr spill area. */
2287 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2288 + current_frame_info
.spill_size
))
2291 /* Spill all general registers. */
2292 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2293 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2295 reg
= gen_rtx_REG (DImode
, regno
);
2296 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2300 /* Handle BR0 specially -- it may be getting stored permanently in
2301 some GR register. */
2302 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2304 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2305 if (current_frame_info
.reg_save_b0
!= 0)
2307 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2308 insn
= emit_move_insn (alt_reg
, reg
);
2309 RTX_FRAME_RELATED_P (insn
) = 1;
2311 /* Even if we're not going to generate an epilogue, we still
2312 need to save the register so that EH works. */
2314 emit_insn (gen_prologue_use (alt_reg
));
2318 alt_regno
= next_scratch_gr_reg ();
2319 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2320 emit_move_insn (alt_reg
, reg
);
2321 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2326 /* Spill the rest of the BR registers. */
2327 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2328 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2330 alt_regno
= next_scratch_gr_reg ();
2331 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2332 reg
= gen_rtx_REG (DImode
, regno
);
2333 emit_move_insn (alt_reg
, reg
);
2334 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2338 /* Align the frame and spill all FR registers. */
2339 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2340 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2344 reg
= gen_rtx_REG (XFmode
, regno
);
2345 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2349 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2352 finish_spill_pointers ();
2355 /* Called after register allocation to add any instructions needed for the
2356 epilogue. Using an epilogue insn is favored compared to putting all of the
2357 instructions in output_function_prologue(), since it allows the scheduler
2358 to intermix instructions with the saves of the caller saved registers. In
2359 some cases, it might be necessary to emit a barrier instruction as the last
2360 insn to prevent such scheduling. */
2363 ia64_expand_epilogue (int sibcall_p
)
2365 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2366 int regno
, alt_regno
, cfa_off
;
2368 ia64_compute_frame_size (get_frame_size ());
2370 /* If there is a frame pointer, then we use it instead of the stack
2371 pointer, so that the stack pointer does not need to be valid when
2372 the epilogue starts. See EXIT_IGNORE_STACK. */
2373 if (frame_pointer_needed
)
2374 setup_spill_pointers (current_frame_info
.n_spilled
,
2375 hard_frame_pointer_rtx
, 0);
2377 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2378 current_frame_info
.total_size
);
2380 if (current_frame_info
.total_size
!= 0)
2382 /* ??? At this point we must generate a magic insn that appears to
2383 modify the spill iterators and the frame pointer. This would
2384 allow the most scheduling freedom. For now, just hard stop. */
2385 emit_insn (gen_blockage ());
2388 /* Locate the bottom of the register save area. */
2389 cfa_off
= (current_frame_info
.spill_cfa_off
2390 + current_frame_info
.spill_size
2391 + current_frame_info
.extra_spill_size
);
2393 /* Restore the predicate registers. */
2394 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2396 if (current_frame_info
.reg_save_pr
!= 0)
2397 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2400 alt_regno
= next_scratch_gr_reg ();
2401 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2402 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2405 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2406 emit_move_insn (reg
, alt_reg
);
2409 /* Restore the application registers. */
2411 /* Load the saved unat from the stack, but do not restore it until
2412 after the GRs have been restored. */
2413 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2415 if (current_frame_info
.reg_save_ar_unat
!= 0)
2417 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2420 alt_regno
= next_scratch_gr_reg ();
2421 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2422 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2423 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2428 ar_unat_save_reg
= NULL_RTX
;
2430 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2432 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2433 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2434 emit_move_insn (reg
, alt_reg
);
2436 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2438 alt_regno
= next_scratch_gr_reg ();
2439 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2440 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2442 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2443 emit_move_insn (reg
, alt_reg
);
2446 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2448 if (current_frame_info
.reg_save_ar_lc
!= 0)
2449 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2452 alt_regno
= next_scratch_gr_reg ();
2453 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2454 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2457 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2458 emit_move_insn (reg
, alt_reg
);
2461 /* We should now be at the base of the gr/br/fr spill area. */
2462 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2463 + current_frame_info
.spill_size
))
2466 /* The GP may be stored on the stack in the prologue, but it's
2467 never restored in the epilogue. Skip the stack slot. */
2468 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
2471 /* Restore all general registers. */
2472 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
2473 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2475 reg
= gen_rtx_REG (DImode
, regno
);
2476 do_restore (gen_gr_restore
, reg
, cfa_off
);
2480 /* Restore the branch registers. Handle B0 specially, as it may
2481 have gotten stored in some GR register. */
2482 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2484 if (current_frame_info
.reg_save_b0
!= 0)
2485 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2488 alt_regno
= next_scratch_gr_reg ();
2489 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2490 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2493 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2494 emit_move_insn (reg
, alt_reg
);
2497 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2498 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2500 alt_regno
= next_scratch_gr_reg ();
2501 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2502 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2504 reg
= gen_rtx_REG (DImode
, regno
);
2505 emit_move_insn (reg
, alt_reg
);
2508 /* Restore floating point registers. */
2509 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2510 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2514 reg
= gen_rtx_REG (XFmode
, regno
);
2515 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2519 /* Restore ar.unat for real. */
2520 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2522 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2523 emit_move_insn (reg
, ar_unat_save_reg
);
2526 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2529 finish_spill_pointers ();
2531 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2533 /* ??? At this point we must generate a magic insn that appears to
2534 modify the spill iterators, the stack pointer, and the frame
2535 pointer. This would allow the most scheduling freedom. For now,
2537 emit_insn (gen_blockage ());
2540 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2541 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2542 else if (frame_pointer_needed
)
2544 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2545 RTX_FRAME_RELATED_P (insn
) = 1;
2547 else if (current_frame_info
.total_size
)
2549 rtx offset
, frame_size_rtx
;
2551 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2552 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2553 offset
= frame_size_rtx
;
2556 regno
= next_scratch_gr_reg ();
2557 offset
= gen_rtx_REG (DImode
, regno
);
2558 emit_move_insn (offset
, frame_size_rtx
);
2561 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2564 RTX_FRAME_RELATED_P (insn
) = 1;
2565 if (GET_CODE (offset
) != CONST_INT
)
2568 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2569 gen_rtx_SET (VOIDmode
,
2571 gen_rtx_PLUS (DImode
,
2578 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2579 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2582 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2585 int fp
= GR_REG (2);
2586 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2587 first available call clobbered register. If there was a frame_pointer
2588 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2589 so we have to make sure we're using the string "r2" when emitting
2590 the register name for the assembler. */
2591 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2592 fp
= HARD_FRAME_POINTER_REGNUM
;
2594 /* We must emit an alloc to force the input registers to become output
2595 registers. Otherwise, if the callee tries to pass its parameters
2596 through to another call without an intervening alloc, then these
2598 /* ??? We don't need to preserve all input registers. We only need to
2599 preserve those input registers used as arguments to the sibling call.
2600 It is unclear how to compute that number here. */
2601 if (current_frame_info
.n_input_regs
!= 0)
2602 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2603 const0_rtx
, const0_rtx
,
2604 GEN_INT (current_frame_info
.n_input_regs
),
2609 /* Return 1 if br.ret can do all the work required to return from a
2613 ia64_direct_return (void)
2615 if (reload_completed
&& ! frame_pointer_needed
)
2617 ia64_compute_frame_size (get_frame_size ());
2619 return (current_frame_info
.total_size
== 0
2620 && current_frame_info
.n_spilled
== 0
2621 && current_frame_info
.reg_save_b0
== 0
2622 && current_frame_info
.reg_save_pr
== 0
2623 && current_frame_info
.reg_save_ar_pfs
== 0
2624 && current_frame_info
.reg_save_ar_unat
== 0
2625 && current_frame_info
.reg_save_ar_lc
== 0);
2630 /* Return the magic cookie that we use to hold the return address
2631 during early compilation. */
2634 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
2638 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
2641 /* Split this value after reload, now that we know where the return
2642 address is saved. */
2645 ia64_split_return_addr_rtx (rtx dest
)
2649 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2651 if (current_frame_info
.reg_save_b0
!= 0)
2652 src
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2658 /* Compute offset from CFA for BR0. */
2659 /* ??? Must be kept in sync with ia64_expand_prologue. */
2660 off
= (current_frame_info
.spill_cfa_off
2661 + current_frame_info
.spill_size
);
2662 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2663 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2666 /* Convert CFA offset to a register based offset. */
2667 if (frame_pointer_needed
)
2668 src
= hard_frame_pointer_rtx
;
2671 src
= stack_pointer_rtx
;
2672 off
+= current_frame_info
.total_size
;
2675 /* Load address into scratch register. */
2676 if (CONST_OK_FOR_I (off
))
2677 emit_insn (gen_adddi3 (dest
, src
, GEN_INT (off
)));
2680 emit_move_insn (dest
, GEN_INT (off
));
2681 emit_insn (gen_adddi3 (dest
, src
, dest
));
2684 src
= gen_rtx_MEM (Pmode
, dest
);
2688 src
= gen_rtx_REG (DImode
, BR_REG (0));
2690 emit_move_insn (dest
, src
);
2694 ia64_hard_regno_rename_ok (int from
, int to
)
2696 /* Don't clobber any of the registers we reserved for the prologue. */
2697 if (to
== current_frame_info
.reg_fp
2698 || to
== current_frame_info
.reg_save_b0
2699 || to
== current_frame_info
.reg_save_pr
2700 || to
== current_frame_info
.reg_save_ar_pfs
2701 || to
== current_frame_info
.reg_save_ar_unat
2702 || to
== current_frame_info
.reg_save_ar_lc
)
2705 if (from
== current_frame_info
.reg_fp
2706 || from
== current_frame_info
.reg_save_b0
2707 || from
== current_frame_info
.reg_save_pr
2708 || from
== current_frame_info
.reg_save_ar_pfs
2709 || from
== current_frame_info
.reg_save_ar_unat
2710 || from
== current_frame_info
.reg_save_ar_lc
)
2713 /* Don't use output registers outside the register frame. */
2714 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2717 /* Retain even/oddness on predicate register pairs. */
2718 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2719 return (from
& 1) == (to
& 1);
2724 /* Target hook for assembling integer objects. Handle word-sized
2725 aligned objects and detect the cases when @fptr is needed. */
2728 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
2730 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
2732 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2733 && GET_CODE (x
) == SYMBOL_REF
2734 && SYMBOL_REF_FUNCTION_P (x
))
2736 if (POINTER_SIZE
== 32)
2737 fputs ("\tdata4\t@fptr(", asm_out_file
);
2739 fputs ("\tdata8\t@fptr(", asm_out_file
);
2740 output_addr_const (asm_out_file
, x
);
2741 fputs (")\n", asm_out_file
);
2744 return default_assemble_integer (x
, size
, aligned_p
);
2747 /* Emit the function prologue. */
2750 ia64_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
2752 int mask
, grsave
, grsave_prev
;
2754 if (current_frame_info
.need_regstk
)
2755 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2756 current_frame_info
.n_input_regs
,
2757 current_frame_info
.n_local_regs
,
2758 current_frame_info
.n_output_regs
,
2759 current_frame_info
.n_rotate_regs
);
2761 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2764 /* Emit the .prologue directive. */
2767 grsave
= grsave_prev
= 0;
2768 if (current_frame_info
.reg_save_b0
!= 0)
2771 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2773 if (current_frame_info
.reg_save_ar_pfs
!= 0
2774 && (grsave_prev
== 0
2775 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2778 if (grsave_prev
== 0)
2779 grsave
= current_frame_info
.reg_save_ar_pfs
;
2780 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2782 if (current_frame_info
.reg_fp
!= 0
2783 && (grsave_prev
== 0
2784 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2787 if (grsave_prev
== 0)
2788 grsave
= HARD_FRAME_POINTER_REGNUM
;
2789 grsave_prev
= current_frame_info
.reg_fp
;
2791 if (current_frame_info
.reg_save_pr
!= 0
2792 && (grsave_prev
== 0
2793 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2796 if (grsave_prev
== 0)
2797 grsave
= current_frame_info
.reg_save_pr
;
2800 if (mask
&& TARGET_GNU_AS
)
2801 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2802 ia64_dbx_register_number (grsave
));
2804 fputs ("\t.prologue\n", file
);
2806 /* Emit a .spill directive, if necessary, to relocate the base of
2807 the register spill area. */
2808 if (current_frame_info
.spill_cfa_off
!= -16)
2809 fprintf (file
, "\t.spill %ld\n",
2810 (long) (current_frame_info
.spill_cfa_off
2811 + current_frame_info
.spill_size
));
2814 /* Emit the .body directive at the scheduled end of the prologue. */
2817 ia64_output_function_end_prologue (FILE *file
)
2819 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
2822 fputs ("\t.body\n", file
);
2825 /* Emit the function epilogue. */
2828 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
2829 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
2833 if (current_frame_info
.reg_fp
)
2835 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2836 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2837 = reg_names
[current_frame_info
.reg_fp
];
2838 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2840 if (! TARGET_REG_NAMES
)
2842 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2843 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2844 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2845 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2846 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2847 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2850 current_frame_info
.initialized
= 0;
2854 ia64_dbx_register_number (int regno
)
2856 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2857 from its home at loc79 to something inside the register frame. We
2858 must perform the same renumbering here for the debug info. */
2859 if (current_frame_info
.reg_fp
)
2861 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2862 regno
= current_frame_info
.reg_fp
;
2863 else if (regno
== current_frame_info
.reg_fp
)
2864 regno
= HARD_FRAME_POINTER_REGNUM
;
2867 if (IN_REGNO_P (regno
))
2868 return 32 + regno
- IN_REG (0);
2869 else if (LOC_REGNO_P (regno
))
2870 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2871 else if (OUT_REGNO_P (regno
))
2872 return (32 + current_frame_info
.n_input_regs
2873 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2879 ia64_initialize_trampoline (rtx addr
, rtx fnaddr
, rtx static_chain
)
2881 rtx addr_reg
, eight
= GEN_INT (8);
2883 /* The Intel assembler requires that the global __ia64_trampoline symbol
2884 be declared explicitly */
2887 static bool declared_ia64_trampoline
= false;
2889 if (!declared_ia64_trampoline
)
2891 declared_ia64_trampoline
= true;
2892 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
2893 "__ia64_trampoline");
2897 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2898 addr
= convert_memory_address (Pmode
, addr
);
2899 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
2900 static_chain
= convert_memory_address (Pmode
, static_chain
);
2902 /* Load up our iterator. */
2903 addr_reg
= gen_reg_rtx (Pmode
);
2904 emit_move_insn (addr_reg
, addr
);
2906 /* The first two words are the fake descriptor:
2907 __ia64_trampoline, ADDR+16. */
2908 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2909 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2910 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2912 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2913 copy_to_reg (plus_constant (addr
, 16)));
2914 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2916 /* The third word is the target descriptor. */
2917 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2918 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2920 /* The fourth word is the static chain. */
2921 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2924 /* Do any needed setup for a variadic function. CUM has not been updated
2925 for the last named argument which has type TYPE and mode MODE.
2927 We generate the actual spill instructions during prologue generation. */
2930 ia64_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2931 tree type
, int * pretend_size
,
2932 int second_time ATTRIBUTE_UNUSED
)
2934 CUMULATIVE_ARGS next_cum
= *cum
;
2936 /* Skip the current argument. */
2937 ia64_function_arg_advance (&next_cum
, mode
, type
, 1);
2939 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
2941 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
2942 *pretend_size
= n
* UNITS_PER_WORD
;
2943 cfun
->machine
->n_varargs
= n
;
2947 /* Check whether TYPE is a homogeneous floating point aggregate. If
2948 it is, return the mode of the floating point type that appears
2949 in all leafs. If it is not, return VOIDmode.
2951 An aggregate is a homogeneous floating point aggregate is if all
2952 fields/elements in it have the same floating point type (e.g,
2953 SFmode). 128-bit quad-precision floats are excluded. */
2955 static enum machine_mode
2956 hfa_element_mode (tree type
, int nested
)
2958 enum machine_mode element_mode
= VOIDmode
;
2959 enum machine_mode mode
;
2960 enum tree_code code
= TREE_CODE (type
);
2961 int know_element_mode
= 0;
2966 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2967 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2968 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2969 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2973 /* Fortran complex types are supposed to be HFAs, so we need to handle
2974 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2977 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
2978 && TYPE_MODE (type
) != TCmode
)
2979 return GET_MODE_INNER (TYPE_MODE (type
));
2984 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2985 mode if this is contained within an aggregate. */
2986 if (nested
&& TYPE_MODE (type
) != TFmode
)
2987 return TYPE_MODE (type
);
2992 return hfa_element_mode (TREE_TYPE (type
), 1);
2996 case QUAL_UNION_TYPE
:
2997 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
2999 if (TREE_CODE (t
) != FIELD_DECL
)
3002 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3003 if (know_element_mode
)
3005 if (mode
!= element_mode
)
3008 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3012 know_element_mode
= 1;
3013 element_mode
= mode
;
3016 return element_mode
;
3019 /* If we reach here, we probably have some front-end specific type
3020 that the backend doesn't know about. This can happen via the
3021 aggregate_value_p call in init_function_start. All we can do is
3022 ignore unknown tree types. */
3029 /* Return the number of words required to hold a quantity of TYPE and MODE
3030 when passed as an argument. */
3032 ia64_function_arg_words (tree type
, enum machine_mode mode
)
3036 if (mode
== BLKmode
)
3037 words
= int_size_in_bytes (type
);
3039 words
= GET_MODE_SIZE (mode
);
3041 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
3044 /* Return the number of registers that should be skipped so the current
3045 argument (described by TYPE and WORDS) will be properly aligned.
3047 Integer and float arguments larger than 8 bytes start at the next
3048 even boundary. Aggregates larger than 8 bytes start at the next
3049 even boundary if the aggregate has 16 byte alignment. Note that
3050 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3051 but are still to be aligned in registers.
3053 ??? The ABI does not specify how to handle aggregates with
3054 alignment from 9 to 15 bytes, or greater than 16. We handle them
3055 all as if they had 16 byte alignment. Such aggregates can occur
3056 only if gcc extensions are used. */
3058 ia64_function_arg_offset (CUMULATIVE_ARGS
*cum
, tree type
, int words
)
3060 if ((cum
->words
& 1) == 0)
3064 && TREE_CODE (type
) != INTEGER_TYPE
3065 && TREE_CODE (type
) != REAL_TYPE
)
3066 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
3071 /* Return rtx for register where argument is passed, or zero if it is passed
3073 /* ??? 128-bit quad-precision floats are always passed in general
3077 ia64_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
, tree type
,
3078 int named
, int incoming
)
3080 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
3081 int words
= ia64_function_arg_words (type
, mode
);
3082 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3083 enum machine_mode hfa_mode
= VOIDmode
;
3085 /* If all argument slots are used, then it must go on the stack. */
3086 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3089 /* Check for and handle homogeneous FP aggregates. */
3091 hfa_mode
= hfa_element_mode (type
, 0);
3093 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3094 and unprototyped hfas are passed specially. */
3095 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3099 int fp_regs
= cum
->fp_regs
;
3100 int int_regs
= cum
->words
+ offset
;
3101 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3105 /* If prototyped, pass it in FR regs then GR regs.
3106 If not prototyped, pass it in both FR and GR regs.
3108 If this is an SFmode aggregate, then it is possible to run out of
3109 FR regs while GR regs are still left. In that case, we pass the
3110 remaining part in the GR regs. */
3112 /* Fill the FP regs. We do this always. We stop if we reach the end
3113 of the argument, the last FP register, or the last argument slot. */
3115 byte_size
= ((mode
== BLKmode
)
3116 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3117 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3119 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3120 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
3122 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3123 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3127 args_byte_size
+= hfa_size
;
3131 /* If no prototype, then the whole thing must go in GR regs. */
3132 if (! cum
->prototype
)
3134 /* If this is an SFmode aggregate, then we might have some left over
3135 that needs to go in GR regs. */
3136 else if (byte_size
!= offset
)
3137 int_regs
+= offset
/ UNITS_PER_WORD
;
3139 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3141 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3143 enum machine_mode gr_mode
= DImode
;
3144 unsigned int gr_size
;
3146 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3147 then this goes in a GR reg left adjusted/little endian, right
3148 adjusted/big endian. */
3149 /* ??? Currently this is handled wrong, because 4-byte hunks are
3150 always right adjusted/little endian. */
3153 /* If we have an even 4 byte hunk because the aggregate is a
3154 multiple of 4 bytes in size, then this goes in a GR reg right
3155 adjusted/little endian. */
3156 else if (byte_size
- offset
== 4)
3159 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3160 gen_rtx_REG (gr_mode
, (basereg
3164 gr_size
= GET_MODE_SIZE (gr_mode
);
3166 if (gr_size
== UNITS_PER_WORD
3167 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
3169 else if (gr_size
> UNITS_PER_WORD
)
3170 int_regs
+= gr_size
/ UNITS_PER_WORD
;
3173 /* If we ended up using just one location, just return that one loc, but
3174 change the mode back to the argument mode. However, we can't do this
3175 when hfa_mode is XFmode and mode is TImode. In that case, we would
3176 return a TImode reference to an FP reg, but FP regs can't hold TImode.
3177 We need the PARALLEL to make this work. This can happen for a union
3178 containing a single __float80 member. */
3179 if (i
== 1 && ! (hfa_mode
== XFmode
&& mode
== TImode
))
3180 return gen_rtx_REG (mode
, REGNO (XEXP (loc
[0], 0)));
3182 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3185 /* Integral and aggregates go in general registers. If we have run out of
3186 FR registers, then FP values must also go in general registers. This can
3187 happen when we have a SFmode HFA. */
3188 else if (mode
== TFmode
|| mode
== TCmode
3189 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3191 int byte_size
= ((mode
== BLKmode
)
3192 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3193 if (BYTES_BIG_ENDIAN
3194 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3195 && byte_size
< UNITS_PER_WORD
3198 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3199 gen_rtx_REG (DImode
,
3200 (basereg
+ cum
->words
3203 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3206 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3210 /* If there is a prototype, then FP values go in a FR register when
3211 named, and in a GR register when unnamed. */
3212 else if (cum
->prototype
)
3215 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3216 /* In big-endian mode, an anonymous SFmode value must be represented
3217 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3218 the value into the high half of the general register. */
3219 else if (BYTES_BIG_ENDIAN
&& mode
== SFmode
)
3220 return gen_rtx_PARALLEL (mode
,
3222 gen_rtx_EXPR_LIST (VOIDmode
,
3223 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
3226 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3228 /* If there is no prototype, then FP values go in both FR and GR
3232 /* See comment above. */
3233 enum machine_mode inner_mode
=
3234 (BYTES_BIG_ENDIAN
&& mode
== SFmode
) ? DImode
: mode
;
3236 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3237 gen_rtx_REG (mode
, (FR_ARG_FIRST
3240 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3241 gen_rtx_REG (inner_mode
,
3242 (basereg
+ cum
->words
3246 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3250 /* Return number of words, at the beginning of the argument, that must be
3251 put in registers. 0 is the argument is entirely in registers or entirely
3255 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3256 tree type
, int named ATTRIBUTE_UNUSED
)
3258 int words
= ia64_function_arg_words (type
, mode
);
3259 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3261 /* If all argument slots are used, then it must go on the stack. */
3262 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3265 /* It doesn't matter whether the argument goes in FR or GR regs. If
3266 it fits within the 8 argument slots, then it goes entirely in
3267 registers. If it extends past the last argument slot, then the rest
3268 goes on the stack. */
3270 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3273 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3276 /* Update CUM to point after this argument. This is patterned after
3277 ia64_function_arg. */
3280 ia64_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3281 tree type
, int named
)
3283 int words
= ia64_function_arg_words (type
, mode
);
3284 int offset
= ia64_function_arg_offset (cum
, type
, words
);
3285 enum machine_mode hfa_mode
= VOIDmode
;
3287 /* If all arg slots are already full, then there is nothing to do. */
3288 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3291 cum
->words
+= words
+ offset
;
3293 /* Check for and handle homogeneous FP aggregates. */
3295 hfa_mode
= hfa_element_mode (type
, 0);
3297 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3298 and unprototyped hfas are passed specially. */
3299 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3301 int fp_regs
= cum
->fp_regs
;
3302 /* This is the original value of cum->words + offset. */
3303 int int_regs
= cum
->words
- words
;
3304 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3308 /* If prototyped, pass it in FR regs then GR regs.
3309 If not prototyped, pass it in both FR and GR regs.
3311 If this is an SFmode aggregate, then it is possible to run out of
3312 FR regs while GR regs are still left. In that case, we pass the
3313 remaining part in the GR regs. */
3315 /* Fill the FP regs. We do this always. We stop if we reach the end
3316 of the argument, the last FP register, or the last argument slot. */
3318 byte_size
= ((mode
== BLKmode
)
3319 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3320 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3322 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3323 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3326 args_byte_size
+= hfa_size
;
3330 cum
->fp_regs
= fp_regs
;
3333 /* Integral and aggregates go in general registers. If we have run out of
3334 FR registers, then FP values must also go in general registers. This can
3335 happen when we have a SFmode HFA. */
3336 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3337 cum
->int_regs
= cum
->words
;
3339 /* If there is a prototype, then FP values go in a FR register when
3340 named, and in a GR register when unnamed. */
3341 else if (cum
->prototype
)
3344 cum
->int_regs
= cum
->words
;
3346 /* ??? Complex types should not reach here. */
3347 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3349 /* If there is no prototype, then FP values go in both FR and GR
3353 /* ??? Complex types should not reach here. */
3354 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3355 cum
->int_regs
= cum
->words
;
3359 /* Variable sized types are passed by reference. */
3360 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3363 ia64_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3364 enum machine_mode mode ATTRIBUTE_UNUSED
,
3365 tree type
, bool named ATTRIBUTE_UNUSED
)
3367 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3370 /* True if it is OK to do sibling call optimization for the specified
3371 call expression EXP. DECL will be the called function, or NULL if
3372 this is an indirect call. */
3374 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
3376 /* We must always return with our current GP. This means we can
3377 only sibcall to functions defined in the current module. */
3378 return decl
&& (*targetm
.binds_local_p
) (decl
);
3382 /* Implement va_arg. */
3385 ia64_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3387 /* Variable sized types are passed by reference. */
3388 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
3390 tree ptrtype
= build_pointer_type (type
);
3391 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
3392 return build_fold_indirect_ref (addr
);
3395 /* Aggregate arguments with alignment larger than 8 bytes start at
3396 the next even boundary. Integer and floating point arguments
3397 do so if they are larger than 8 bytes, whether or not they are
3398 also aligned larger than 8 bytes. */
3399 if ((TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == INTEGER_TYPE
)
3400 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3402 tree t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3403 build_int_cst (NULL_TREE
, 2 * UNITS_PER_WORD
- 1, 0));
3404 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3405 build_int_cst (NULL_TREE
, -2 * UNITS_PER_WORD
, -1));
3406 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3407 gimplify_and_add (t
, pre_p
);
3410 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3413 /* Return 1 if function return value returned in memory. Return 0 if it is
3417 ia64_return_in_memory (tree valtype
, tree fntype ATTRIBUTE_UNUSED
)
3419 enum machine_mode mode
;
3420 enum machine_mode hfa_mode
;
3421 HOST_WIDE_INT byte_size
;
3423 mode
= TYPE_MODE (valtype
);
3424 byte_size
= GET_MODE_SIZE (mode
);
3425 if (mode
== BLKmode
)
3427 byte_size
= int_size_in_bytes (valtype
);
3432 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3434 hfa_mode
= hfa_element_mode (valtype
, 0);
3435 if (hfa_mode
!= VOIDmode
)
3437 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3439 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3444 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3450 /* Return rtx for register that holds the function return value. */
3453 ia64_function_value (tree valtype
, tree func ATTRIBUTE_UNUSED
)
3455 enum machine_mode mode
;
3456 enum machine_mode hfa_mode
;
3458 mode
= TYPE_MODE (valtype
);
3459 hfa_mode
= hfa_element_mode (valtype
, 0);
3461 if (hfa_mode
!= VOIDmode
)
3469 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3470 byte_size
= ((mode
== BLKmode
)
3471 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3473 for (i
= 0; offset
< byte_size
; i
++)
3475 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3476 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3482 return XEXP (loc
[0], 0);
3484 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3486 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
3487 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3490 if (BYTES_BIG_ENDIAN
3491 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
3499 bytesize
= int_size_in_bytes (valtype
);
3500 for (i
= 0; offset
< bytesize
; i
++)
3502 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3503 gen_rtx_REG (DImode
,
3506 offset
+= UNITS_PER_WORD
;
3508 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3511 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3515 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3516 We need to emit DTP-relative relocations. */
3519 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
3523 fputs ("\tdata8.ua\t@dtprel(", file
);
3524 output_addr_const (file
, x
);
3528 /* Print a memory address as an operand to reference that memory location. */
3530 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3531 also call this from ia64_print_operand for memory addresses. */
3534 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
3535 rtx address ATTRIBUTE_UNUSED
)
3539 /* Print an operand to an assembler instruction.
3540 C Swap and print a comparison operator.
3541 D Print an FP comparison operator.
3542 E Print 32 - constant, for SImode shifts as extract.
3543 e Print 64 - constant, for DImode rotates.
3544 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3545 a floating point register emitted normally.
3546 I Invert a predicate register by adding 1.
3547 J Select the proper predicate register for a condition.
3548 j Select the inverse predicate register for a condition.
3549 O Append .acq for volatile load.
3550 P Postincrement of a MEM.
3551 Q Append .rel for volatile store.
3552 S Shift amount for shladd instruction.
3553 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3554 for Intel assembler.
3555 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3556 for Intel assembler.
3557 r Print register name, or constant 0 as r0. HP compatibility for
3560 ia64_print_operand (FILE * file
, rtx x
, int code
)
3567 /* Handled below. */
3572 enum rtx_code c
= swap_condition (GET_CODE (x
));
3573 fputs (GET_RTX_NAME (c
), file
);
3578 switch (GET_CODE (x
))
3590 str
= GET_RTX_NAME (GET_CODE (x
));
3597 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3601 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3605 if (x
== CONST0_RTX (GET_MODE (x
)))
3606 str
= reg_names
[FR_REG (0)];
3607 else if (x
== CONST1_RTX (GET_MODE (x
)))
3608 str
= reg_names
[FR_REG (1)];
3609 else if (GET_CODE (x
) == REG
)
3610 str
= reg_names
[REGNO (x
)];
3617 fputs (reg_names
[REGNO (x
) + 1], file
);
3623 unsigned int regno
= REGNO (XEXP (x
, 0));
3624 if (GET_CODE (x
) == EQ
)
3628 fputs (reg_names
[regno
], file
);
3633 if (MEM_VOLATILE_P (x
))
3634 fputs(".acq", file
);
3639 HOST_WIDE_INT value
;
3641 switch (GET_CODE (XEXP (x
, 0)))
3647 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3648 if (GET_CODE (x
) == CONST_INT
)
3650 else if (GET_CODE (x
) == REG
)
3652 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3660 value
= GET_MODE_SIZE (GET_MODE (x
));
3664 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3668 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
3673 if (MEM_VOLATILE_P (x
))
3674 fputs(".rel", file
);
3678 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3682 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3684 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3690 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3692 const char *prefix
= "0x";
3693 if (INTVAL (x
) & 0x80000000)
3695 fprintf (file
, "0xffffffff");
3698 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3704 /* If this operand is the constant zero, write it as register zero.
3705 Any register, zero, or CONST_INT value is OK here. */
3706 if (GET_CODE (x
) == REG
)
3707 fputs (reg_names
[REGNO (x
)], file
);
3708 else if (x
== CONST0_RTX (GET_MODE (x
)))
3710 else if (GET_CODE (x
) == CONST_INT
)
3711 output_addr_const (file
, x
);
3713 output_operand_lossage ("invalid %%r value");
3720 /* For conditional branches, returns or calls, substitute
3721 sptk, dptk, dpnt, or spnt for %s. */
3722 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3725 int pred_val
= INTVAL (XEXP (x
, 0));
3727 /* Guess top and bottom 10% statically predicted. */
3728 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3730 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3732 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3737 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3742 fputs (which
, file
);
3747 x
= current_insn_predicate
;
3750 unsigned int regno
= REGNO (XEXP (x
, 0));
3751 if (GET_CODE (x
) == EQ
)
3753 fprintf (file
, "(%s) ", reg_names
[regno
]);
3758 output_operand_lossage ("ia64_print_operand: unknown code");
3762 switch (GET_CODE (x
))
3764 /* This happens for the spill/restore instructions. */
3769 /* ... fall through ... */
3772 fputs (reg_names
[REGNO (x
)], file
);
3777 rtx addr
= XEXP (x
, 0);
3778 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
3779 addr
= XEXP (addr
, 0);
3780 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3785 output_addr_const (file
, x
);
3792 /* Compute a (partial) cost for rtx X. Return true if the complete
3793 cost has been computed, and false if subexpressions should be
3794 scanned. In either case, *TOTAL contains the cost result. */
3795 /* ??? This is incomplete. */
3798 ia64_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
3806 *total
= CONST_OK_FOR_J (INTVAL (x
)) ? 0 : COSTS_N_INSNS (1);
3809 if (CONST_OK_FOR_I (INTVAL (x
)))
3811 else if (CONST_OK_FOR_J (INTVAL (x
)))
3814 *total
= COSTS_N_INSNS (1);
3817 if (CONST_OK_FOR_K (INTVAL (x
)) || CONST_OK_FOR_L (INTVAL (x
)))
3820 *total
= COSTS_N_INSNS (1);
3825 *total
= COSTS_N_INSNS (1);
3831 *total
= COSTS_N_INSNS (3);
3835 /* For multiplies wider than HImode, we have to go to the FPU,
3836 which normally involves copies. Plus there's the latency
3837 of the multiply itself, and the latency of the instructions to
3838 transfer integer regs to FP regs. */
3839 /* ??? Check for FP mode. */
3840 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
3841 *total
= COSTS_N_INSNS (10);
3843 *total
= COSTS_N_INSNS (2);
3851 *total
= COSTS_N_INSNS (1);
3858 /* We make divide expensive, so that divide-by-constant will be
3859 optimized to a multiply. */
3860 *total
= COSTS_N_INSNS (60);
3868 /* Calculate the cost of moving data from a register in class FROM to
3869 one in class TO, using MODE. */
3872 ia64_register_move_cost (enum machine_mode mode
, enum reg_class from
,
3875 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3876 if (to
== ADDL_REGS
)
3878 if (from
== ADDL_REGS
)
3881 /* All costs are symmetric, so reduce cases by putting the
3882 lower number class as the destination. */
3885 enum reg_class tmp
= to
;
3886 to
= from
, from
= tmp
;
3889 /* Moving from FR<->GR in XFmode must be more expensive than 2,
3890 so that we get secondary memory reloads. Between FR_REGS,
3891 we have to make this at least as expensive as MEMORY_MOVE_COST
3892 to avoid spectacularly poor register class preferencing. */
3895 if (to
!= GR_REGS
|| from
!= GR_REGS
)
3896 return MEMORY_MOVE_COST (mode
, to
, 0);
3904 /* Moving between PR registers takes two insns. */
3905 if (from
== PR_REGS
)
3907 /* Moving between PR and anything but GR is impossible. */
3908 if (from
!= GR_REGS
)
3909 return MEMORY_MOVE_COST (mode
, to
, 0);
3913 /* Moving between BR and anything but GR is impossible. */
3914 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
3915 return MEMORY_MOVE_COST (mode
, to
, 0);
3920 /* Moving between AR and anything but GR is impossible. */
3921 if (from
!= GR_REGS
)
3922 return MEMORY_MOVE_COST (mode
, to
, 0);
3927 case GR_AND_FR_REGS
:
3928 case GR_AND_BR_REGS
:
3939 /* This function returns the register class required for a secondary
3940 register when copying between one of the registers in CLASS, and X,
3941 using MODE. A return value of NO_REGS means that no secondary register
3945 ia64_secondary_reload_class (enum reg_class
class,
3946 enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3950 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3951 regno
= true_regnum (x
);
3958 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3959 interaction. We end up with two pseudos with overlapping lifetimes
3960 both of which are equiv to the same constant, and both which need
3961 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3962 changes depending on the path length, which means the qty_first_reg
3963 check in make_regs_eqv can give different answers at different times.
3964 At some point I'll probably need a reload_indi pattern to handle
3967 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3968 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3969 non-general registers for good measure. */
3970 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
3973 /* This is needed if a pseudo used as a call_operand gets spilled to a
3975 if (GET_CODE (x
) == MEM
)
3980 /* Need to go through general registers to get to other class regs. */
3981 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
3984 /* This can happen when a paradoxical subreg is an operand to the
3986 /* ??? This shouldn't be necessary after instruction scheduling is
3987 enabled, because paradoxical subregs are not accepted by
3988 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3989 stop the paradoxical subreg stupidity in the *_operand functions
3991 if (GET_CODE (x
) == MEM
3992 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3993 || GET_MODE (x
) == QImode
))
3996 /* This can happen because of the ior/and/etc patterns that accept FP
3997 registers as operands. If the third operand is a constant, then it
3998 needs to be reloaded into a FP register. */
3999 if (GET_CODE (x
) == CONST_INT
)
4002 /* This can happen because of register elimination in a muldi3 insn.
4003 E.g. `26107 * (unsigned long)&u'. */
4004 if (GET_CODE (x
) == PLUS
)
4009 /* ??? This happens if we cse/gcse a BImode value across a call,
4010 and the function has a nonlocal goto. This is because global
4011 does not allocate call crossing pseudos to hard registers when
4012 current_function_has_nonlocal_goto is true. This is relatively
4013 common for C++ programs that use exceptions. To reproduce,
4014 return NO_REGS and compile libstdc++. */
4015 if (GET_CODE (x
) == MEM
)
4018 /* This can happen when we take a BImode subreg of a DImode value,
4019 and that DImode value winds up in some non-GR register. */
4020 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
4032 /* Emit text to declare externally defined variables and functions, because
4033 the Intel assembler does not support undefined externals. */
4036 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
4038 int save_referenced
;
4040 /* GNU as does not need anything here, but the HP linker does need
4041 something for external functions. */
4045 || TREE_CODE (decl
) != FUNCTION_DECL
4046 || strstr (name
, "__builtin_") == name
))
4049 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4050 the linker when we do this, so we need to be careful not to do this for
4051 builtin functions which have no library equivalent. Unfortunately, we
4052 can't tell here whether or not a function will actually be called by
4053 expand_expr, so we pull in library functions even if we may not need
4055 if (! strcmp (name
, "__builtin_next_arg")
4056 || ! strcmp (name
, "alloca")
4057 || ! strcmp (name
, "__builtin_constant_p")
4058 || ! strcmp (name
, "__builtin_args_info"))
4062 ia64_hpux_add_extern_decl (decl
);
4065 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4067 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
4068 if (TREE_CODE (decl
) == FUNCTION_DECL
)
4069 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
4070 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4071 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
4075 /* Parse the -mfixed-range= option string. */
4078 fix_range (const char *const_str
)
4081 char *str
, *dash
, *comma
;
4083 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4084 REG2 are either register names or register numbers. The effect
4085 of this option is to mark the registers in the range from REG1 to
4086 REG2 as ``fixed'' so they won't be used by the compiler. This is
4087 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4089 i
= strlen (const_str
);
4090 str
= (char *) alloca (i
+ 1);
4091 memcpy (str
, const_str
, i
+ 1);
4095 dash
= strchr (str
, '-');
4098 warning ("value of -mfixed-range must have form REG1-REG2");
4103 comma
= strchr (dash
+ 1, ',');
4107 first
= decode_reg_name (str
);
4110 warning ("unknown register name: %s", str
);
4114 last
= decode_reg_name (dash
+ 1);
4117 warning ("unknown register name: %s", dash
+ 1);
4125 warning ("%s-%s is an empty range", str
, dash
+ 1);
4129 for (i
= first
; i
<= last
; ++i
)
4130 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4140 static struct machine_function
*
4141 ia64_init_machine_status (void)
4143 return ggc_alloc_cleared (sizeof (struct machine_function
));
4146 /* Handle TARGET_OPTIONS switches. */
4149 ia64_override_options (void)
4153 const char *const name
; /* processor name or nickname. */
4154 const enum processor_type processor
;
4156 const processor_alias_table
[] =
4158 {"itanium", PROCESSOR_ITANIUM
},
4159 {"itanium1", PROCESSOR_ITANIUM
},
4160 {"merced", PROCESSOR_ITANIUM
},
4161 {"itanium2", PROCESSOR_ITANIUM2
},
4162 {"mckinley", PROCESSOR_ITANIUM2
},
4165 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
4168 if (TARGET_AUTO_PIC
)
4169 target_flags
|= MASK_CONST_GP
;
4171 if (TARGET_INLINE_FLOAT_DIV_LAT
&& TARGET_INLINE_FLOAT_DIV_THR
)
4173 if ((target_flags_explicit
& MASK_INLINE_FLOAT_DIV_LAT
)
4174 && (target_flags_explicit
& MASK_INLINE_FLOAT_DIV_THR
))
4176 warning ("cannot optimize floating point division for both latency and throughput");
4177 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4181 if (target_flags_explicit
& MASK_INLINE_FLOAT_DIV_THR
)
4182 target_flags
&= ~MASK_INLINE_FLOAT_DIV_LAT
;
4184 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4188 if (TARGET_INLINE_INT_DIV_LAT
&& TARGET_INLINE_INT_DIV_THR
)
4190 if ((target_flags_explicit
& MASK_INLINE_INT_DIV_LAT
)
4191 && (target_flags_explicit
& MASK_INLINE_INT_DIV_THR
))
4193 warning ("cannot optimize integer division for both latency and throughput");
4194 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4198 if (target_flags_explicit
& MASK_INLINE_INT_DIV_THR
)
4199 target_flags
&= ~MASK_INLINE_INT_DIV_LAT
;
4201 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4205 if (TARGET_INLINE_SQRT_LAT
&& TARGET_INLINE_SQRT_THR
)
4207 if ((target_flags_explicit
& MASK_INLINE_SQRT_LAT
)
4208 && (target_flags_explicit
& MASK_INLINE_SQRT_THR
))
4210 warning ("cannot optimize square root for both latency and throughput");
4211 target_flags
&= ~MASK_INLINE_SQRT_THR
;
4215 if (target_flags_explicit
& MASK_INLINE_SQRT_THR
)
4216 target_flags
&= ~MASK_INLINE_SQRT_LAT
;
4218 target_flags
&= ~MASK_INLINE_SQRT_THR
;
4222 if (TARGET_INLINE_SQRT_LAT
)
4224 warning ("not yet implemented: latency-optimized inline square root");
4225 target_flags
&= ~MASK_INLINE_SQRT_LAT
;
4228 if (ia64_fixed_range_string
)
4229 fix_range (ia64_fixed_range_string
);
4231 if (ia64_tls_size_string
)
4234 unsigned long tmp
= strtoul (ia64_tls_size_string
, &end
, 10);
4235 if (*end
|| (tmp
!= 14 && tmp
!= 22 && tmp
!= 64))
4236 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string
);
4238 ia64_tls_size
= tmp
;
4241 if (!ia64_tune_string
)
4242 ia64_tune_string
= "itanium2";
4244 for (i
= 0; i
< pta_size
; i
++)
4245 if (! strcmp (ia64_tune_string
, processor_alias_table
[i
].name
))
4247 ia64_tune
= processor_alias_table
[i
].processor
;
4252 error ("bad value (%s) for -tune= switch", ia64_tune_string
);
4254 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
4255 flag_schedule_insns_after_reload
= 0;
4257 /* Variable tracking should be run after all optimizations which change order
4258 of insns. It also needs a valid CFG. */
4259 ia64_flag_var_tracking
= flag_var_tracking
;
4260 flag_var_tracking
= 0;
4262 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
4264 init_machine_status
= ia64_init_machine_status
;
4267 static enum attr_itanium_class
ia64_safe_itanium_class (rtx
);
4268 static enum attr_type
ia64_safe_type (rtx
);
4270 static enum attr_itanium_class
4271 ia64_safe_itanium_class (rtx insn
)
4273 if (recog_memoized (insn
) >= 0)
4274 return get_attr_itanium_class (insn
);
4276 return ITANIUM_CLASS_UNKNOWN
;
4279 static enum attr_type
4280 ia64_safe_type (rtx insn
)
4282 if (recog_memoized (insn
) >= 0)
4283 return get_attr_type (insn
);
4285 return TYPE_UNKNOWN
;
4288 /* The following collection of routines emit instruction group stop bits as
4289 necessary to avoid dependencies. */
4291 /* Need to track some additional registers as far as serialization is
4292 concerned so we can properly handle br.call and br.ret. We could
4293 make these registers visible to gcc, but since these registers are
4294 never explicitly used in gcc generated code, it seems wasteful to
4295 do so (plus it would make the call and return patterns needlessly
4297 #define REG_RP (BR_REG (0))
4298 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4299 /* This is used for volatile asms which may require a stop bit immediately
4300 before and after them. */
4301 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4302 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4303 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4305 /* For each register, we keep track of how it has been written in the
4306 current instruction group.
4308 If a register is written unconditionally (no qualifying predicate),
4309 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4311 If a register is written if its qualifying predicate P is true, we
4312 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4313 may be written again by the complement of P (P^1) and when this happens,
4314 WRITE_COUNT gets set to 2.
4316 The result of this is that whenever an insn attempts to write a register
4317 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4319 If a predicate register is written by a floating-point insn, we set
4320 WRITTEN_BY_FP to true.
4322 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4323 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4325 struct reg_write_state
4327 unsigned int write_count
: 2;
4328 unsigned int first_pred
: 16;
4329 unsigned int written_by_fp
: 1;
4330 unsigned int written_by_and
: 1;
4331 unsigned int written_by_or
: 1;
4334 /* Cumulative info for the current instruction group. */
4335 struct reg_write_state rws_sum
[NUM_REGS
];
4336 /* Info for the current instruction. This gets copied to rws_sum after a
4337 stop bit is emitted. */
4338 struct reg_write_state rws_insn
[NUM_REGS
];
4340 /* Indicates whether this is the first instruction after a stop bit,
4341 in which case we don't need another stop bit. Without this, we hit
4342 the abort in ia64_variable_issue when scheduling an alloc. */
4343 static int first_instruction
;
4345 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4346 RTL for one instruction. */
4349 unsigned int is_write
: 1; /* Is register being written? */
4350 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4351 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4352 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4353 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4354 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4357 static void rws_update (struct reg_write_state
*, int, struct reg_flags
, int);
4358 static int rws_access_regno (int, struct reg_flags
, int);
4359 static int rws_access_reg (rtx
, struct reg_flags
, int);
4360 static void update_set_flags (rtx
, struct reg_flags
*, int *, rtx
*);
4361 static int set_src_needs_barrier (rtx
, struct reg_flags
, int, rtx
);
4362 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
4363 static void init_insn_group_barriers (void);
4364 static int group_barrier_needed_p (rtx
);
4365 static int safe_group_barrier_needed_p (rtx
);
4367 /* Update *RWS for REGNO, which is being written by the current instruction,
4368 with predicate PRED, and associated register flags in FLAGS. */
4371 rws_update (struct reg_write_state
*rws
, int regno
, struct reg_flags flags
, int pred
)
4374 rws
[regno
].write_count
++;
4376 rws
[regno
].write_count
= 2;
4377 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4378 /* ??? Not tracking and/or across differing predicates. */
4379 rws
[regno
].written_by_and
= flags
.is_and
;
4380 rws
[regno
].written_by_or
= flags
.is_or
;
4381 rws
[regno
].first_pred
= pred
;
4384 /* Handle an access to register REGNO of type FLAGS using predicate register
4385 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4386 a dependency with an earlier instruction in the same group. */
4389 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
4391 int need_barrier
= 0;
4393 if (regno
>= NUM_REGS
)
4396 if (! PR_REGNO_P (regno
))
4397 flags
.is_and
= flags
.is_or
= 0;
4403 /* One insn writes same reg multiple times? */
4404 if (rws_insn
[regno
].write_count
> 0)
4407 /* Update info for current instruction. */
4408 rws_update (rws_insn
, regno
, flags
, pred
);
4409 write_count
= rws_sum
[regno
].write_count
;
4411 switch (write_count
)
4414 /* The register has not been written yet. */
4415 rws_update (rws_sum
, regno
, flags
, pred
);
4419 /* The register has been written via a predicate. If this is
4420 not a complementary predicate, then we need a barrier. */
4421 /* ??? This assumes that P and P+1 are always complementary
4422 predicates for P even. */
4423 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4425 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4427 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4429 rws_update (rws_sum
, regno
, flags
, pred
);
4433 /* The register has been unconditionally written already. We
4435 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4437 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4441 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4442 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4451 if (flags
.is_branch
)
4453 /* Branches have several RAW exceptions that allow to avoid
4456 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4457 /* RAW dependencies on branch regs are permissible as long
4458 as the writer is a non-branch instruction. Since we
4459 never generate code that uses a branch register written
4460 by a branch instruction, handling this case is
4464 if (REGNO_REG_CLASS (regno
) == PR_REGS
4465 && ! rws_sum
[regno
].written_by_fp
)
4466 /* The predicates of a branch are available within the
4467 same insn group as long as the predicate was written by
4468 something other than a floating-point instruction. */
4472 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4474 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4477 switch (rws_sum
[regno
].write_count
)
4480 /* The register has not been written yet. */
4484 /* The register has been written via a predicate. If this is
4485 not a complementary predicate, then we need a barrier. */
4486 /* ??? This assumes that P and P+1 are always complementary
4487 predicates for P even. */
4488 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4493 /* The register has been unconditionally written already. We
4503 return need_barrier
;
4507 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
4509 int regno
= REGNO (reg
);
4510 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4513 return rws_access_regno (regno
, flags
, pred
);
4516 int need_barrier
= 0;
4518 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4519 return need_barrier
;
4523 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4524 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4527 update_set_flags (rtx x
, struct reg_flags
*pflags
, int *ppred
, rtx
*pcond
)
4529 rtx src
= SET_SRC (x
);
4533 switch (GET_CODE (src
))
4539 if (SET_DEST (x
) == pc_rtx
)
4540 /* X is a conditional branch. */
4544 int is_complemented
= 0;
4546 /* X is a conditional move. */
4547 rtx cond
= XEXP (src
, 0);
4548 if (GET_CODE (cond
) == EQ
)
4549 is_complemented
= 1;
4550 cond
= XEXP (cond
, 0);
4551 if (GET_CODE (cond
) != REG
4552 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4555 if (XEXP (src
, 1) == SET_DEST (x
)
4556 || XEXP (src
, 2) == SET_DEST (x
))
4558 /* X is a conditional move that conditionally writes the
4561 /* We need another complement in this case. */
4562 if (XEXP (src
, 1) == SET_DEST (x
))
4563 is_complemented
= ! is_complemented
;
4565 *ppred
= REGNO (cond
);
4566 if (is_complemented
)
4570 /* ??? If this is a conditional write to the dest, then this
4571 instruction does not actually read one source. This probably
4572 doesn't matter, because that source is also the dest. */
4573 /* ??? Multiple writes to predicate registers are allowed
4574 if they are all AND type compares, or if they are all OR
4575 type compares. We do not generate such instructions
4578 /* ... fall through ... */
4581 if (COMPARISON_P (src
)
4582 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4583 /* Set pflags->is_fp to 1 so that we know we're dealing
4584 with a floating point comparison when processing the
4585 destination of the SET. */
4588 /* Discover if this is a parallel comparison. We only handle
4589 and.orcm and or.andcm at present, since we must retain a
4590 strict inverse on the predicate pair. */
4591 else if (GET_CODE (src
) == AND
)
4593 else if (GET_CODE (src
) == IOR
)
4600 /* Subroutine of rtx_needs_barrier; this function determines whether the
4601 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4602 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4606 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
, rtx cond
)
4608 int need_barrier
= 0;
4610 rtx src
= SET_SRC (x
);
4612 if (GET_CODE (src
) == CALL
)
4613 /* We don't need to worry about the result registers that
4614 get written by subroutine call. */
4615 return rtx_needs_barrier (src
, flags
, pred
);
4616 else if (SET_DEST (x
) == pc_rtx
)
4618 /* X is a conditional branch. */
4619 /* ??? This seems redundant, as the caller sets this bit for
4621 flags
.is_branch
= 1;
4622 return rtx_needs_barrier (src
, flags
, pred
);
4625 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4627 /* This instruction unconditionally uses a predicate register. */
4629 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4632 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4634 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4635 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4636 dst
= XEXP (dst
, 0);
4638 return need_barrier
;
4641 /* Handle an access to rtx X of type FLAGS using predicate register
4642 PRED. Return 1 if this access creates a dependency with an earlier
4643 instruction in the same group. */
4646 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
4649 int is_complemented
= 0;
4650 int need_barrier
= 0;
4651 const char *format_ptr
;
4652 struct reg_flags new_flags
;
4660 switch (GET_CODE (x
))
4663 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4664 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4665 if (GET_CODE (SET_SRC (x
)) != CALL
)
4667 new_flags
.is_write
= 1;
4668 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4673 new_flags
.is_write
= 0;
4674 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4676 /* Avoid multiple register writes, in case this is a pattern with
4677 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4678 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4680 new_flags
.is_write
= 1;
4681 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4682 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4683 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4688 /* X is a predicated instruction. */
4690 cond
= COND_EXEC_TEST (x
);
4693 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4695 if (GET_CODE (cond
) == EQ
)
4696 is_complemented
= 1;
4697 cond
= XEXP (cond
, 0);
4698 if (GET_CODE (cond
) != REG
4699 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4701 pred
= REGNO (cond
);
4702 if (is_complemented
)
4705 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4706 return need_barrier
;
4710 /* Clobber & use are for earlier compiler-phases only. */
4715 /* We always emit stop bits for traditional asms. We emit stop bits
4716 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4717 if (GET_CODE (x
) != ASM_OPERANDS
4718 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4720 /* Avoid writing the register multiple times if we have multiple
4721 asm outputs. This avoids an abort in rws_access_reg. */
4722 if (! rws_insn
[REG_VOLATILE
].write_count
)
4724 new_flags
.is_write
= 1;
4725 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4730 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4731 We cannot just fall through here since then we would be confused
4732 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4733 traditional asms unlike their normal usage. */
4735 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4736 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4741 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4743 rtx pat
= XVECEXP (x
, 0, i
);
4744 if (GET_CODE (pat
) == SET
)
4746 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
4747 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
4749 else if (GET_CODE (pat
) == USE
4750 || GET_CODE (pat
) == CALL
4751 || GET_CODE (pat
) == ASM_OPERANDS
)
4752 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4753 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
4756 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4758 rtx pat
= XVECEXP (x
, 0, i
);
4759 if (GET_CODE (pat
) == SET
)
4761 if (GET_CODE (SET_SRC (pat
)) != CALL
)
4763 new_flags
.is_write
= 1;
4764 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
4768 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
4769 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
4777 if (REGNO (x
) == AR_UNAT_REGNUM
)
4779 for (i
= 0; i
< 64; ++i
)
4780 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4783 need_barrier
= rws_access_reg (x
, flags
, pred
);
4787 /* Find the regs used in memory address computation. */
4788 new_flags
.is_write
= 0;
4789 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4792 case CONST_INT
: case CONST_DOUBLE
:
4793 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4796 /* Operators with side-effects. */
4797 case POST_INC
: case POST_DEC
:
4798 if (GET_CODE (XEXP (x
, 0)) != REG
)
4801 new_flags
.is_write
= 0;
4802 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4803 new_flags
.is_write
= 1;
4804 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4808 if (GET_CODE (XEXP (x
, 0)) != REG
)
4811 new_flags
.is_write
= 0;
4812 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4813 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4814 new_flags
.is_write
= 1;
4815 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4818 /* Handle common unary and binary ops for efficiency. */
4819 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4820 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4821 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4822 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4823 case NE
: case EQ
: case GE
: case GT
: case LE
:
4824 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4825 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4826 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4829 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4830 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4831 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4832 case SQRT
: case FFS
: case POPCOUNT
:
4833 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4837 switch (XINT (x
, 1))
4839 case UNSPEC_LTOFF_DTPMOD
:
4840 case UNSPEC_LTOFF_DTPREL
:
4842 case UNSPEC_LTOFF_TPREL
:
4844 case UNSPEC_PRED_REL_MUTEX
:
4845 case UNSPEC_PIC_CALL
:
4847 case UNSPEC_FETCHADD_ACQ
:
4848 case UNSPEC_BSP_VALUE
:
4849 case UNSPEC_FLUSHRS
:
4850 case UNSPEC_BUNDLE_SELECTOR
:
4853 case UNSPEC_GR_SPILL
:
4854 case UNSPEC_GR_RESTORE
:
4856 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4857 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4859 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4860 new_flags
.is_write
= (XINT (x
, 1) == 1);
4861 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4866 case UNSPEC_FR_SPILL
:
4867 case UNSPEC_FR_RESTORE
:
4868 case UNSPEC_GETF_EXP
:
4869 case UNSPEC_SETF_EXP
:
4871 case UNSPEC_FR_SQRT_RECIP_APPROX
:
4872 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4875 case UNSPEC_FR_RECIP_APPROX
:
4876 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4877 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4880 case UNSPEC_CMPXCHG_ACQ
:
4881 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4882 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4890 case UNSPEC_VOLATILE
:
4891 switch (XINT (x
, 1))
4894 /* Alloc must always be the first instruction of a group.
4895 We force this by always returning true. */
4896 /* ??? We might get better scheduling if we explicitly check for
4897 input/local/output register dependencies, and modify the
4898 scheduler so that alloc is always reordered to the start of
4899 the current group. We could then eliminate all of the
4900 first_instruction code. */
4901 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4903 new_flags
.is_write
= 1;
4904 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4907 case UNSPECV_SET_BSP
:
4911 case UNSPECV_BLOCKAGE
:
4912 case UNSPECV_INSN_GROUP_BARRIER
:
4914 case UNSPECV_PSAC_ALL
:
4915 case UNSPECV_PSAC_NORMAL
:
4924 new_flags
.is_write
= 0;
4925 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4926 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4928 new_flags
.is_write
= 1;
4929 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4930 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4934 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4935 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4936 switch (format_ptr
[i
])
4938 case '0': /* unused field */
4939 case 'i': /* integer */
4940 case 'n': /* note */
4941 case 'w': /* wide integer */
4942 case 's': /* pointer to string */
4943 case 'S': /* optional pointer to string */
4947 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4952 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4953 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4962 return need_barrier
;
4965 /* Clear out the state for group_barrier_needed_p at the start of a
4966 sequence of insns. */
4969 init_insn_group_barriers (void)
4971 memset (rws_sum
, 0, sizeof (rws_sum
));
4972 first_instruction
= 1;
4975 /* Given the current state, recorded by previous calls to this function,
4976 determine whether a group barrier (a stop bit) is necessary before INSN.
4977 Return nonzero if so. */
4980 group_barrier_needed_p (rtx insn
)
4983 int need_barrier
= 0;
4984 struct reg_flags flags
;
4986 memset (&flags
, 0, sizeof (flags
));
4987 switch (GET_CODE (insn
))
4993 /* A barrier doesn't imply an instruction group boundary. */
4997 memset (rws_insn
, 0, sizeof (rws_insn
));
5001 flags
.is_branch
= 1;
5002 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
5003 memset (rws_insn
, 0, sizeof (rws_insn
));
5005 /* Don't bundle a call following another call. */
5006 if ((pat
= prev_active_insn (insn
))
5007 && GET_CODE (pat
) == CALL_INSN
)
5013 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
5017 flags
.is_branch
= 1;
5019 /* Don't bundle a jump following a call. */
5020 if ((pat
= prev_active_insn (insn
))
5021 && GET_CODE (pat
) == CALL_INSN
)
5029 if (GET_CODE (PATTERN (insn
)) == USE
5030 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
5031 /* Don't care about USE and CLOBBER "insns"---those are used to
5032 indicate to the optimizer that it shouldn't get rid of
5033 certain operations. */
5036 pat
= PATTERN (insn
);
5038 /* Ug. Hack hacks hacked elsewhere. */
5039 switch (recog_memoized (insn
))
5041 /* We play dependency tricks with the epilogue in order
5042 to get proper schedules. Undo this for dv analysis. */
5043 case CODE_FOR_epilogue_deallocate_stack
:
5044 case CODE_FOR_prologue_allocate_stack
:
5045 pat
= XVECEXP (pat
, 0, 0);
5048 /* The pattern we use for br.cloop confuses the code above.
5049 The second element of the vector is representative. */
5050 case CODE_FOR_doloop_end_internal
:
5051 pat
= XVECEXP (pat
, 0, 1);
5054 /* Doesn't generate code. */
5055 case CODE_FOR_pred_rel_mutex
:
5056 case CODE_FOR_prologue_use
:
5063 memset (rws_insn
, 0, sizeof (rws_insn
));
5064 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
5066 /* Check to see if the previous instruction was a volatile
5069 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
5076 if (first_instruction
&& INSN_P (insn
)
5077 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
5078 && GET_CODE (PATTERN (insn
)) != USE
5079 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
5082 first_instruction
= 0;
5085 return need_barrier
;
5088 /* Like group_barrier_needed_p, but do not clobber the current state. */
5091 safe_group_barrier_needed_p (rtx insn
)
5093 struct reg_write_state rws_saved
[NUM_REGS
];
5094 int saved_first_instruction
;
5097 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
5098 saved_first_instruction
= first_instruction
;
5100 t
= group_barrier_needed_p (insn
);
5102 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
5103 first_instruction
= saved_first_instruction
;
5108 /* Scan the current function and insert stop bits as necessary to
5109 eliminate dependencies. This function assumes that a final
5110 instruction scheduling pass has been run which has already
5111 inserted most of the necessary stop bits. This function only
5112 inserts new ones at basic block boundaries, since these are
5113 invisible to the scheduler. */
5116 emit_insn_group_barriers (FILE *dump
)
5120 int insns_since_last_label
= 0;
5122 init_insn_group_barriers ();
5124 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5126 if (GET_CODE (insn
) == CODE_LABEL
)
5128 if (insns_since_last_label
)
5130 insns_since_last_label
= 0;
5132 else if (GET_CODE (insn
) == NOTE
5133 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
5135 if (insns_since_last_label
)
5137 insns_since_last_label
= 0;
5139 else if (GET_CODE (insn
) == INSN
5140 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
5141 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
5143 init_insn_group_barriers ();
5146 else if (INSN_P (insn
))
5148 insns_since_last_label
= 1;
5150 if (group_barrier_needed_p (insn
))
5155 fprintf (dump
, "Emitting stop before label %d\n",
5156 INSN_UID (last_label
));
5157 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
5160 init_insn_group_barriers ();
5168 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5169 This function has to emit all necessary group barriers. */
5172 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
5176 init_insn_group_barriers ();
5178 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5180 if (GET_CODE (insn
) == BARRIER
)
5182 rtx last
= prev_active_insn (insn
);
5186 if (GET_CODE (last
) == JUMP_INSN
5187 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
5188 last
= prev_active_insn (last
);
5189 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
5190 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
5192 init_insn_group_barriers ();
5194 else if (INSN_P (insn
))
5196 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
5197 init_insn_group_barriers ();
5198 else if (group_barrier_needed_p (insn
))
5200 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5201 init_insn_group_barriers ();
5202 group_barrier_needed_p (insn
);
5209 static int errata_find_address_regs (rtx
*, void *);
5210 static void errata_emit_nops (rtx
);
5211 static void fixup_errata (void);
5213 /* This structure is used to track some details about the previous insns
5214 groups so we can determine if it may be necessary to insert NOPs to
5215 workaround hardware errata. */
5218 HARD_REG_SET p_reg_set
;
5219 HARD_REG_SET gr_reg_conditionally_set
;
5222 /* Index into the last_group array. */
5223 static int group_idx
;
5225 /* Called through for_each_rtx; determines if a hard register that was
5226 conditionally set in the previous group is used as an address register.
5227 It ensures that for_each_rtx returns 1 in that case. */
5229 errata_find_address_regs (rtx
*xp
, void *data ATTRIBUTE_UNUSED
)
5232 if (GET_CODE (x
) != MEM
)
5235 if (GET_CODE (x
) == POST_MODIFY
)
5237 if (GET_CODE (x
) == REG
)
5239 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5240 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
5248 /* Called for each insn; this function keeps track of the state in
5249 last_group and emits additional NOPs if necessary to work around
5250 an Itanium A/B step erratum. */
5252 errata_emit_nops (rtx insn
)
5254 struct group
*this_group
= last_group
+ group_idx
;
5255 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5256 rtx pat
= PATTERN (insn
);
5257 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5258 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5259 enum attr_type type
;
5262 if (GET_CODE (real_pat
) == USE
5263 || GET_CODE (real_pat
) == CLOBBER
5264 || GET_CODE (real_pat
) == ASM_INPUT
5265 || GET_CODE (real_pat
) == ADDR_VEC
5266 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5267 || asm_noperands (PATTERN (insn
)) >= 0)
5270 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5273 if (GET_CODE (set
) == PARALLEL
)
5276 set
= XVECEXP (real_pat
, 0, 0);
5277 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5278 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5279 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5286 if (set
&& GET_CODE (set
) != SET
)
5289 type
= get_attr_type (insn
);
5292 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5293 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5295 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5296 && REG_P (SET_DEST (set
))
5297 && GET_CODE (SET_SRC (set
)) != PLUS
5298 && GET_CODE (SET_SRC (set
)) != MINUS
5299 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5300 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5301 && (GET_CODE (SET_SRC (set
)) != MEM
5302 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5303 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5305 if (!COMPARISON_P (cond
)
5306 || !REG_P (XEXP (cond
, 0)))
5309 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5310 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5312 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5314 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5315 emit_insn_before (gen_nop (), insn
);
5316 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5318 memset (last_group
, 0, sizeof last_group
);
5322 /* Emit extra nops if they are required to work around hardware errata. */
5329 if (! TARGET_B_STEP
)
5333 memset (last_group
, 0, sizeof last_group
);
5335 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5340 if (ia64_safe_type (insn
) == TYPE_S
)
5343 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5346 errata_emit_nops (insn
);
5351 /* Instruction scheduling support. */
5353 #define NR_BUNDLES 10
5355 /* A list of names of all available bundles. */
5357 static const char *bundle_name
[NR_BUNDLES
] =
5363 #if NR_BUNDLES == 10
5373 /* Nonzero if we should insert stop bits into the schedule. */
5375 int ia64_final_schedule
= 0;
5377 /* Codes of the corresponding quieryied units: */
5379 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
5380 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
5382 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
5383 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
5385 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
5387 /* The following variable value is an insn group barrier. */
5389 static rtx dfa_stop_insn
;
5391 /* The following variable value is the last issued insn. */
5393 static rtx last_scheduled_insn
;
5395 /* The following variable value is size of the DFA state. */
5397 static size_t dfa_state_size
;
5399 /* The following variable value is pointer to a DFA state used as
5400 temporary variable. */
5402 static state_t temp_dfa_state
= NULL
;
5404 /* The following variable value is DFA state after issuing the last
5407 static state_t prev_cycle_state
= NULL
;
5409 /* The following array element values are TRUE if the corresponding
5410 insn requires to add stop bits before it. */
5412 static char *stops_p
;
5414 /* The following variable is used to set up the mentioned above array. */
5416 static int stop_before_p
= 0;
5418 /* The following variable value is length of the arrays `clocks' and
5421 static int clocks_length
;
5423 /* The following array element values are cycles on which the
5424 corresponding insn will be issued. The array is used only for
5429 /* The following array element values are numbers of cycles should be
5430 added to improve insn scheduling for MM_insns for Itanium1. */
5432 static int *add_cycles
;
5434 static rtx
ia64_single_set (rtx
);
5435 static void ia64_emit_insn_before (rtx
, rtx
);
5437 /* Map a bundle number to its pseudo-op. */
5440 get_bundle_name (int b
)
5442 return bundle_name
[b
];
5446 /* Return the maximum number of instructions a cpu can issue. */
5449 ia64_issue_rate (void)
5454 /* Helper function - like single_set, but look inside COND_EXEC. */
5457 ia64_single_set (rtx insn
)
5459 rtx x
= PATTERN (insn
), ret
;
5460 if (GET_CODE (x
) == COND_EXEC
)
5461 x
= COND_EXEC_CODE (x
);
5462 if (GET_CODE (x
) == SET
)
5465 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5466 Although they are not classical single set, the second set is there just
5467 to protect it from moving past FP-relative stack accesses. */
5468 switch (recog_memoized (insn
))
5470 case CODE_FOR_prologue_allocate_stack
:
5471 case CODE_FOR_epilogue_deallocate_stack
:
5472 ret
= XVECEXP (x
, 0, 0);
5476 ret
= single_set_2 (insn
, x
);
5483 /* Adjust the cost of a scheduling dependency. Return the new cost of
5484 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5487 ia64_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
5489 enum attr_itanium_class dep_class
;
5490 enum attr_itanium_class insn_class
;
5492 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
)
5495 insn_class
= ia64_safe_itanium_class (insn
);
5496 dep_class
= ia64_safe_itanium_class (dep_insn
);
5497 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
5498 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
5504 /* Like emit_insn_before, but skip cycle_display notes.
5505 ??? When cycle display notes are implemented, update this. */
5508 ia64_emit_insn_before (rtx insn
, rtx before
)
5510 emit_insn_before (insn
, before
);
5513 /* The following function marks insns who produce addresses for load
5514 and store insns. Such insns will be placed into M slots because it
5515 decrease latency time for Itanium1 (see function
5516 `ia64_produce_address_p' and the DFA descriptions). */
5519 ia64_dependencies_evaluation_hook (rtx head
, rtx tail
)
5521 rtx insn
, link
, next
, next_tail
;
5523 next_tail
= NEXT_INSN (tail
);
5524 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5527 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5529 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
5531 for (link
= INSN_DEPEND (insn
); link
!= 0; link
= XEXP (link
, 1))
5533 next
= XEXP (link
, 0);
5534 if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_ST
5535 || ia64_safe_itanium_class (next
) == ITANIUM_CLASS_STF
)
5536 && ia64_st_address_bypass_p (insn
, next
))
5538 else if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_LD
5539 || ia64_safe_itanium_class (next
)
5540 == ITANIUM_CLASS_FLD
)
5541 && ia64_ld_address_bypass_p (insn
, next
))
5544 insn
->call
= link
!= 0;
5548 /* We're beginning a new block. Initialize data structures as necessary. */
5551 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
5552 int sched_verbose ATTRIBUTE_UNUSED
,
5553 int max_ready ATTRIBUTE_UNUSED
)
5555 #ifdef ENABLE_CHECKING
5558 if (reload_completed
)
5559 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
5560 insn
!= current_sched_info
->next_tail
;
5561 insn
= NEXT_INSN (insn
))
5562 if (SCHED_GROUP_P (insn
))
5565 last_scheduled_insn
= NULL_RTX
;
5566 init_insn_group_barriers ();
5569 /* We are about to being issuing insns for this clock cycle.
5570 Override the default sort algorithm to better slot instructions. */
5573 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
,
5574 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
,
5578 int n_ready
= *pn_ready
;
5579 rtx
*e_ready
= ready
+ n_ready
;
5583 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
5585 if (reorder_type
== 0)
5587 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5589 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5590 if (insnp
< e_ready
)
5593 enum attr_type t
= ia64_safe_type (insn
);
5594 if (t
== TYPE_UNKNOWN
)
5596 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
5597 || asm_noperands (PATTERN (insn
)) >= 0)
5599 rtx lowest
= ready
[n_asms
];
5600 ready
[n_asms
] = insn
;
5606 rtx highest
= ready
[n_ready
- 1];
5607 ready
[n_ready
- 1] = insn
;
5614 if (n_asms
< n_ready
)
5616 /* Some normal insns to process. Skip the asms. */
5620 else if (n_ready
> 0)
5624 if (ia64_final_schedule
)
5627 int nr_need_stop
= 0;
5629 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5630 if (safe_group_barrier_needed_p (*insnp
))
5633 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
5635 if (reorder_type
== 0)
5638 /* Move down everything that needs a stop bit, preserving
5640 while (insnp
-- > ready
+ deleted
)
5641 while (insnp
>= ready
+ deleted
)
5644 if (! safe_group_barrier_needed_p (insn
))
5646 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
5657 /* We are about to being issuing insns for this clock cycle. Override
5658 the default sort algorithm to better slot instructions. */
5661 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
5664 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
5665 pn_ready
, clock_var
, 0);
5668 /* Like ia64_sched_reorder, but called after issuing each insn.
5669 Override the default sort algorithm to better slot instructions. */
5672 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
5673 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
5674 int *pn_ready
, int clock_var
)
5676 if (ia64_tune
== PROCESSOR_ITANIUM
&& reload_completed
&& last_scheduled_insn
)
5677 clocks
[INSN_UID (last_scheduled_insn
)] = clock_var
;
5678 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
5682 /* We are about to issue INSN. Return the number of insns left on the
5683 ready queue that can be issued this cycle. */
5686 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
5687 int sched_verbose ATTRIBUTE_UNUSED
,
5688 rtx insn ATTRIBUTE_UNUSED
,
5689 int can_issue_more ATTRIBUTE_UNUSED
)
5691 last_scheduled_insn
= insn
;
5692 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
5693 if (reload_completed
)
5695 if (group_barrier_needed_p (insn
))
5697 if (GET_CODE (insn
) == CALL_INSN
)
5698 init_insn_group_barriers ();
5699 stops_p
[INSN_UID (insn
)] = stop_before_p
;
5705 /* We are choosing insn from the ready queue. Return nonzero if INSN
5709 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn
)
5711 if (insn
== NULL_RTX
|| !INSN_P (insn
))
5713 return (!reload_completed
5714 || !safe_group_barrier_needed_p (insn
));
5717 /* The following variable value is pseudo-insn used by the DFA insn
5718 scheduler to change the DFA state when the simulated clock is
5721 static rtx dfa_pre_cycle_insn
;
5723 /* We are about to being issuing INSN. Return nonzero if we cannot
5724 issue it on given cycle CLOCK and return zero if we should not sort
5725 the ready queue on the next clock start. */
5728 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx insn
, int last_clock
,
5729 int clock
, int *sort_p
)
5731 int setup_clocks_p
= FALSE
;
5733 if (insn
== NULL_RTX
|| !INSN_P (insn
))
5735 if ((reload_completed
&& safe_group_barrier_needed_p (insn
))
5736 || (last_scheduled_insn
5737 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
5738 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
5739 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
5741 init_insn_group_barriers ();
5742 if (verbose
&& dump
)
5743 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
5744 last_clock
== clock
? " + cycle advance" : "");
5746 if (last_clock
== clock
)
5748 state_transition (curr_state
, dfa_stop_insn
);
5749 if (TARGET_EARLY_STOP_BITS
)
5750 *sort_p
= (last_scheduled_insn
== NULL_RTX
5751 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
5756 else if (reload_completed
)
5757 setup_clocks_p
= TRUE
;
5758 if (GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
5759 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)
5760 state_reset (curr_state
);
5763 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
5764 state_transition (curr_state
, dfa_stop_insn
);
5765 state_transition (curr_state
, dfa_pre_cycle_insn
);
5766 state_transition (curr_state
, NULL
);
5769 else if (reload_completed
)
5770 setup_clocks_p
= TRUE
;
5771 if (setup_clocks_p
&& ia64_tune
== PROCESSOR_ITANIUM
5772 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
5773 && asm_noperands (PATTERN (insn
)) < 0)
5775 enum attr_itanium_class c
= ia64_safe_itanium_class (insn
);
5777 if (c
!= ITANIUM_CLASS_MMMUL
&& c
!= ITANIUM_CLASS_MMSHF
)
5782 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
5783 if (REG_NOTE_KIND (link
) == 0)
5785 enum attr_itanium_class dep_class
;
5786 rtx dep_insn
= XEXP (link
, 0);
5788 dep_class
= ia64_safe_itanium_class (dep_insn
);
5789 if ((dep_class
== ITANIUM_CLASS_MMMUL
5790 || dep_class
== ITANIUM_CLASS_MMSHF
)
5791 && last_clock
- clocks
[INSN_UID (dep_insn
)] < 4
5793 || last_clock
- clocks
[INSN_UID (dep_insn
)] < d
))
5794 d
= last_clock
- clocks
[INSN_UID (dep_insn
)];
5797 add_cycles
[INSN_UID (insn
)] = 3 - d
;
5805 /* The following page contains abstract data `bundle states' which are
5806 used for bundling insns (inserting nops and template generation). */
5808 /* The following describes state of insn bundling. */
5812 /* Unique bundle state number to identify them in the debugging
5815 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
5816 /* number nops before and after the insn */
5817 short before_nops_num
, after_nops_num
;
5818 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
5820 int cost
; /* cost of the state in cycles */
5821 int accumulated_insns_num
; /* number of all previous insns including
5822 nops. L is considered as 2 insns */
5823 int branch_deviation
; /* deviation of previous branches from 3rd slots */
5824 struct bundle_state
*next
; /* next state with the same insn_num */
5825 struct bundle_state
*originator
; /* originator (previous insn state) */
5826 /* All bundle states are in the following chain. */
5827 struct bundle_state
*allocated_states_chain
;
5828 /* The DFA State after issuing the insn and the nops. */
5832 /* The following is map insn number to the corresponding bundle state. */
5834 static struct bundle_state
**index_to_bundle_states
;
5836 /* The unique number of next bundle state. */
5838 static int bundle_states_num
;
5840 /* All allocated bundle states are in the following chain. */
5842 static struct bundle_state
*allocated_bundle_states_chain
;
5844 /* All allocated but not used bundle states are in the following
5847 static struct bundle_state
*free_bundle_state_chain
;
5850 /* The following function returns a free bundle state. */
5852 static struct bundle_state
*
5853 get_free_bundle_state (void)
5855 struct bundle_state
*result
;
5857 if (free_bundle_state_chain
!= NULL
)
5859 result
= free_bundle_state_chain
;
5860 free_bundle_state_chain
= result
->next
;
5864 result
= xmalloc (sizeof (struct bundle_state
));
5865 result
->dfa_state
= xmalloc (dfa_state_size
);
5866 result
->allocated_states_chain
= allocated_bundle_states_chain
;
5867 allocated_bundle_states_chain
= result
;
5869 result
->unique_num
= bundle_states_num
++;
5874 /* The following function frees given bundle state. */
5877 free_bundle_state (struct bundle_state
*state
)
5879 state
->next
= free_bundle_state_chain
;
5880 free_bundle_state_chain
= state
;
5883 /* Start work with abstract data `bundle states'. */
5886 initiate_bundle_states (void)
5888 bundle_states_num
= 0;
5889 free_bundle_state_chain
= NULL
;
5890 allocated_bundle_states_chain
= NULL
;
5893 /* Finish work with abstract data `bundle states'. */
5896 finish_bundle_states (void)
5898 struct bundle_state
*curr_state
, *next_state
;
5900 for (curr_state
= allocated_bundle_states_chain
;
5902 curr_state
= next_state
)
5904 next_state
= curr_state
->allocated_states_chain
;
5905 free (curr_state
->dfa_state
);
5910 /* Hash table of the bundle states. The key is dfa_state and insn_num
5911 of the bundle states. */
5913 static htab_t bundle_state_table
;
5915 /* The function returns hash of BUNDLE_STATE. */
5918 bundle_state_hash (const void *bundle_state
)
5920 const struct bundle_state
*state
= (struct bundle_state
*) bundle_state
;
5923 for (result
= i
= 0; i
< dfa_state_size
; i
++)
5924 result
+= (((unsigned char *) state
->dfa_state
) [i
]
5925 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
5926 return result
+ state
->insn_num
;
5929 /* The function returns nonzero if the bundle state keys are equal. */
5932 bundle_state_eq_p (const void *bundle_state_1
, const void *bundle_state_2
)
5934 const struct bundle_state
* state1
= (struct bundle_state
*) bundle_state_1
;
5935 const struct bundle_state
* state2
= (struct bundle_state
*) bundle_state_2
;
5937 return (state1
->insn_num
== state2
->insn_num
5938 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
5939 dfa_state_size
) == 0);
5942 /* The function inserts the BUNDLE_STATE into the hash table. The
5943 function returns nonzero if the bundle has been inserted into the
5944 table. The table contains the best bundle state with given key. */
5947 insert_bundle_state (struct bundle_state
*bundle_state
)
5951 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, 1);
5952 if (*entry_ptr
== NULL
)
5954 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
5955 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
5956 *entry_ptr
= (void *) bundle_state
;
5959 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
5960 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
5961 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
5962 > bundle_state
->accumulated_insns_num
5963 || (((struct bundle_state
*)
5964 *entry_ptr
)->accumulated_insns_num
5965 == bundle_state
->accumulated_insns_num
5966 && ((struct bundle_state
*)
5967 *entry_ptr
)->branch_deviation
5968 > bundle_state
->branch_deviation
))))
5971 struct bundle_state temp
;
5973 temp
= *(struct bundle_state
*) *entry_ptr
;
5974 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
5975 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
5976 *bundle_state
= temp
;
5981 /* Start work with the hash table. */
5984 initiate_bundle_state_table (void)
5986 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
5990 /* Finish work with the hash table. */
5993 finish_bundle_state_table (void)
5995 htab_delete (bundle_state_table
);
6000 /* The following variable is a insn `nop' used to check bundle states
6001 with different number of inserted nops. */
6003 static rtx ia64_nop
;
6005 /* The following function tries to issue NOPS_NUM nops for the current
6006 state without advancing processor cycle. If it failed, the
6007 function returns FALSE and frees the current state. */
6010 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
6014 for (i
= 0; i
< nops_num
; i
++)
6015 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
6017 free_bundle_state (curr_state
);
6023 /* The following function tries to issue INSN for the current
6024 state without advancing processor cycle. If it failed, the
6025 function returns FALSE and frees the current state. */
6028 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
6030 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
6032 free_bundle_state (curr_state
);
6038 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6039 starting with ORIGINATOR without advancing processor cycle. If
6040 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6041 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6042 If it was successful, the function creates new bundle state and
6043 insert into the hash table and into `index_to_bundle_states'. */
6046 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
6047 rtx insn
, int try_bundle_end_p
, int only_bundle_end_p
)
6049 struct bundle_state
*curr_state
;
6051 curr_state
= get_free_bundle_state ();
6052 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
6053 curr_state
->insn
= insn
;
6054 curr_state
->insn_num
= originator
->insn_num
+ 1;
6055 curr_state
->cost
= originator
->cost
;
6056 curr_state
->originator
= originator
;
6057 curr_state
->before_nops_num
= before_nops_num
;
6058 curr_state
->after_nops_num
= 0;
6059 curr_state
->accumulated_insns_num
6060 = originator
->accumulated_insns_num
+ before_nops_num
;
6061 curr_state
->branch_deviation
= originator
->branch_deviation
;
6062 if (insn
== NULL_RTX
)
6064 else if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
6066 if (GET_MODE (insn
) == TImode
)
6068 if (!try_issue_nops (curr_state
, before_nops_num
))
6070 if (!try_issue_insn (curr_state
, insn
))
6072 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
6073 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
6074 && curr_state
->accumulated_insns_num
% 3 != 0)
6076 free_bundle_state (curr_state
);
6080 else if (GET_MODE (insn
) != TImode
)
6082 if (!try_issue_nops (curr_state
, before_nops_num
))
6084 if (!try_issue_insn (curr_state
, insn
))
6086 curr_state
->accumulated_insns_num
++;
6087 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6088 || asm_noperands (PATTERN (insn
)) >= 0)
6090 if (ia64_safe_type (insn
) == TYPE_L
)
6091 curr_state
->accumulated_insns_num
++;
6095 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
6096 state_transition (curr_state
->dfa_state
, NULL
);
6098 if (!try_issue_nops (curr_state
, before_nops_num
))
6100 if (!try_issue_insn (curr_state
, insn
))
6102 curr_state
->accumulated_insns_num
++;
6103 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6104 || asm_noperands (PATTERN (insn
)) >= 0)
6106 /* Finish bundle containing asm insn. */
6107 curr_state
->after_nops_num
6108 = 3 - curr_state
->accumulated_insns_num
% 3;
6109 curr_state
->accumulated_insns_num
6110 += 3 - curr_state
->accumulated_insns_num
% 3;
6112 else if (ia64_safe_type (insn
) == TYPE_L
)
6113 curr_state
->accumulated_insns_num
++;
6115 if (ia64_safe_type (insn
) == TYPE_B
)
6116 curr_state
->branch_deviation
6117 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
6118 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
6120 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
6123 struct bundle_state
*curr_state1
;
6124 struct bundle_state
*allocated_states_chain
;
6126 curr_state1
= get_free_bundle_state ();
6127 dfa_state
= curr_state1
->dfa_state
;
6128 allocated_states_chain
= curr_state1
->allocated_states_chain
;
6129 *curr_state1
= *curr_state
;
6130 curr_state1
->dfa_state
= dfa_state
;
6131 curr_state1
->allocated_states_chain
= allocated_states_chain
;
6132 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
6134 curr_state
= curr_state1
;
6136 if (!try_issue_nops (curr_state
,
6137 3 - curr_state
->accumulated_insns_num
% 3))
6139 curr_state
->after_nops_num
6140 = 3 - curr_state
->accumulated_insns_num
% 3;
6141 curr_state
->accumulated_insns_num
6142 += 3 - curr_state
->accumulated_insns_num
% 3;
6144 if (!insert_bundle_state (curr_state
))
6145 free_bundle_state (curr_state
);
6149 /* The following function returns position in the two window bundle
6153 get_max_pos (state_t state
)
6155 if (cpu_unit_reservation_p (state
, pos_6
))
6157 else if (cpu_unit_reservation_p (state
, pos_5
))
6159 else if (cpu_unit_reservation_p (state
, pos_4
))
6161 else if (cpu_unit_reservation_p (state
, pos_3
))
6163 else if (cpu_unit_reservation_p (state
, pos_2
))
6165 else if (cpu_unit_reservation_p (state
, pos_1
))
6171 /* The function returns code of a possible template for given position
6172 and state. The function should be called only with 2 values of
6173 position equal to 3 or 6. */
6176 get_template (state_t state
, int pos
)
6181 if (cpu_unit_reservation_p (state
, _0mii_
))
6183 else if (cpu_unit_reservation_p (state
, _0mmi_
))
6185 else if (cpu_unit_reservation_p (state
, _0mfi_
))
6187 else if (cpu_unit_reservation_p (state
, _0mmf_
))
6189 else if (cpu_unit_reservation_p (state
, _0bbb_
))
6191 else if (cpu_unit_reservation_p (state
, _0mbb_
))
6193 else if (cpu_unit_reservation_p (state
, _0mib_
))
6195 else if (cpu_unit_reservation_p (state
, _0mmb_
))
6197 else if (cpu_unit_reservation_p (state
, _0mfb_
))
6199 else if (cpu_unit_reservation_p (state
, _0mlx_
))
6204 if (cpu_unit_reservation_p (state
, _1mii_
))
6206 else if (cpu_unit_reservation_p (state
, _1mmi_
))
6208 else if (cpu_unit_reservation_p (state
, _1mfi_
))
6210 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
6212 else if (cpu_unit_reservation_p (state
, _1bbb_
))
6214 else if (cpu_unit_reservation_p (state
, _1mbb_
))
6216 else if (cpu_unit_reservation_p (state
, _1mib_
))
6218 else if (cpu_unit_reservation_p (state
, _1mmb_
))
6220 else if (cpu_unit_reservation_p (state
, _1mfb_
))
6222 else if (cpu_unit_reservation_p (state
, _1mlx_
))
6231 /* The following function returns an insn important for insn bundling
6232 followed by INSN and before TAIL. */
6235 get_next_important_insn (rtx insn
, rtx tail
)
6237 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
6239 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6240 && GET_CODE (PATTERN (insn
)) != USE
6241 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6246 /* The following function does insn bundling. Bundling means
6247 inserting templates and nop insns to fit insn groups into permitted
6248 templates. Instruction scheduling uses NDFA (non-deterministic
6249 finite automata) encoding informations about the templates and the
6250 inserted nops. Nondeterminism of the automata permits follows
6251 all possible insn sequences very fast.
6253 Unfortunately it is not possible to get information about inserting
6254 nop insns and used templates from the automata states. The
6255 automata only says that we can issue an insn possibly inserting
6256 some nops before it and using some template. Therefore insn
6257 bundling in this function is implemented by using DFA
6258 (deterministic finite automata). We follows all possible insn
6259 sequences by inserting 0-2 nops (that is what the NDFA describe for
6260 insn scheduling) before/after each insn being bundled. We know the
6261 start of simulated processor cycle from insn scheduling (insn
6262 starting a new cycle has TImode).
6264 Simple implementation of insn bundling would create enormous
6265 number of possible insn sequences satisfying information about new
6266 cycle ticks taken from the insn scheduling. To make the algorithm
6267 practical we use dynamic programming. Each decision (about
6268 inserting nops and implicitly about previous decisions) is described
6269 by structure bundle_state (see above). If we generate the same
6270 bundle state (key is automaton state after issuing the insns and
6271 nops for it), we reuse already generated one. As consequence we
6272 reject some decisions which cannot improve the solution and
6273 reduce memory for the algorithm.
6275 When we reach the end of EBB (extended basic block), we choose the
6276 best sequence and then, moving back in EBB, insert templates for
6277 the best alternative. The templates are taken from querying
6278 automaton state for each insn in chosen bundle states.
6280 So the algorithm makes two (forward and backward) passes through
6281 EBB. There is an additional forward pass through EBB for Itanium1
6282 processor. This pass inserts more nops to make dependency between
6283 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
6286 bundling (FILE *dump
, int verbose
, rtx prev_head_insn
, rtx tail
)
6288 struct bundle_state
*curr_state
, *next_state
, *best_state
;
6289 rtx insn
, next_insn
;
6291 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
6292 int pos
= 0, max_pos
, template0
, template1
;
6295 enum attr_type type
;
6298 /* Count insns in the EBB. */
6299 for (insn
= NEXT_INSN (prev_head_insn
);
6300 insn
&& insn
!= tail
;
6301 insn
= NEXT_INSN (insn
))
6307 dfa_clean_insn_cache ();
6308 initiate_bundle_state_table ();
6309 index_to_bundle_states
= xmalloc ((insn_num
+ 2)
6310 * sizeof (struct bundle_state
*));
6311 /* First (forward) pass -- generation of bundle states. */
6312 curr_state
= get_free_bundle_state ();
6313 curr_state
->insn
= NULL
;
6314 curr_state
->before_nops_num
= 0;
6315 curr_state
->after_nops_num
= 0;
6316 curr_state
->insn_num
= 0;
6317 curr_state
->cost
= 0;
6318 curr_state
->accumulated_insns_num
= 0;
6319 curr_state
->branch_deviation
= 0;
6320 curr_state
->next
= NULL
;
6321 curr_state
->originator
= NULL
;
6322 state_reset (curr_state
->dfa_state
);
6323 index_to_bundle_states
[0] = curr_state
;
6325 /* Shift cycle mark if it is put on insn which could be ignored. */
6326 for (insn
= NEXT_INSN (prev_head_insn
);
6328 insn
= NEXT_INSN (insn
))
6330 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6331 || GET_CODE (PATTERN (insn
)) == USE
6332 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6333 && GET_MODE (insn
) == TImode
)
6335 PUT_MODE (insn
, VOIDmode
);
6336 for (next_insn
= NEXT_INSN (insn
);
6338 next_insn
= NEXT_INSN (next_insn
))
6339 if (INSN_P (next_insn
)
6340 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
6341 && GET_CODE (PATTERN (next_insn
)) != USE
6342 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
6344 PUT_MODE (next_insn
, TImode
);
6348 /* Froward pass: generation of bundle states. */
6349 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6354 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6355 || GET_CODE (PATTERN (insn
)) == USE
6356 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6358 type
= ia64_safe_type (insn
);
6359 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6361 index_to_bundle_states
[insn_num
] = NULL
;
6362 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
6364 curr_state
= next_state
)
6366 pos
= curr_state
->accumulated_insns_num
% 3;
6367 next_state
= curr_state
->next
;
6368 /* We must fill up the current bundle in order to start a
6369 subsequent asm insn in a new bundle. Asm insn is always
6370 placed in a separate bundle. */
6372 = (next_insn
!= NULL_RTX
6373 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
6374 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
6375 /* We may fill up the current bundle if it is the cycle end
6376 without a group barrier. */
6378 = (only_bundle_end_p
|| next_insn
== NULL_RTX
6379 || (GET_MODE (next_insn
) == TImode
6380 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
6381 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
6383 /* We need to insert 2 nops for cases like M_MII. To
6384 guarantee issuing all insns on the same cycle for
6385 Itanium 1, we need to issue 2 nops after the first M
6386 insn (MnnMII where n is a nop insn). */
6387 || ((type
== TYPE_M
|| type
== TYPE_A
)
6388 && ia64_tune
== PROCESSOR_ITANIUM
6389 && !bundle_end_p
&& pos
== 1))
6390 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
6392 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
6394 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
6397 if (index_to_bundle_states
[insn_num
] == NULL
)
6399 for (curr_state
= index_to_bundle_states
[insn_num
];
6401 curr_state
= curr_state
->next
)
6402 if (verbose
>= 2 && dump
)
6404 /* This structure is taken from generated code of the
6405 pipeline hazard recognizer (see file insn-attrtab.c).
6406 Please don't forget to change the structure if a new
6407 automaton is added to .md file. */
6410 unsigned short one_automaton_state
;
6411 unsigned short oneb_automaton_state
;
6412 unsigned short two_automaton_state
;
6413 unsigned short twob_automaton_state
;
6418 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6419 curr_state
->unique_num
,
6420 (curr_state
->originator
== NULL
6421 ? -1 : curr_state
->originator
->unique_num
),
6423 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6424 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6425 (ia64_tune
== PROCESSOR_ITANIUM
6426 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6427 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6431 if (index_to_bundle_states
[insn_num
] == NULL
)
6432 /* We should find a solution because the 2nd insn scheduling has
6435 /* Find a state corresponding to the best insn sequence. */
6437 for (curr_state
= index_to_bundle_states
[insn_num
];
6439 curr_state
= curr_state
->next
)
6440 /* We are just looking at the states with fully filled up last
6441 bundle. The first we prefer insn sequences with minimal cost
6442 then with minimal inserted nops and finally with branch insns
6443 placed in the 3rd slots. */
6444 if (curr_state
->accumulated_insns_num
% 3 == 0
6445 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
6446 || (best_state
->cost
== curr_state
->cost
6447 && (curr_state
->accumulated_insns_num
6448 < best_state
->accumulated_insns_num
6449 || (curr_state
->accumulated_insns_num
6450 == best_state
->accumulated_insns_num
6451 && curr_state
->branch_deviation
6452 < best_state
->branch_deviation
)))))
6453 best_state
= curr_state
;
6454 /* Second (backward) pass: adding nops and templates. */
6455 insn_num
= best_state
->before_nops_num
;
6456 template0
= template1
= -1;
6457 for (curr_state
= best_state
;
6458 curr_state
->originator
!= NULL
;
6459 curr_state
= curr_state
->originator
)
6461 insn
= curr_state
->insn
;
6462 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6463 || asm_noperands (PATTERN (insn
)) >= 0);
6465 if (verbose
>= 2 && dump
)
6469 unsigned short one_automaton_state
;
6470 unsigned short oneb_automaton_state
;
6471 unsigned short two_automaton_state
;
6472 unsigned short twob_automaton_state
;
6477 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6478 curr_state
->unique_num
,
6479 (curr_state
->originator
== NULL
6480 ? -1 : curr_state
->originator
->unique_num
),
6482 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6483 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6484 (ia64_tune
== PROCESSOR_ITANIUM
6485 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6486 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6489 /* Find the position in the current bundle window. The window can
6490 contain at most two bundles. Two bundle window means that
6491 the processor will make two bundle rotation. */
6492 max_pos
= get_max_pos (curr_state
->dfa_state
);
6494 /* The following (negative template number) means that the
6495 processor did one bundle rotation. */
6496 || (max_pos
== 3 && template0
< 0))
6498 /* We are at the end of the window -- find template(s) for
6502 template0
= get_template (curr_state
->dfa_state
, 3);
6505 template1
= get_template (curr_state
->dfa_state
, 3);
6506 template0
= get_template (curr_state
->dfa_state
, 6);
6509 if (max_pos
> 3 && template1
< 0)
6510 /* It may happen when we have the stop inside a bundle. */
6514 template1
= get_template (curr_state
->dfa_state
, 3);
6518 /* Emit nops after the current insn. */
6519 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
6522 emit_insn_after (nop
, insn
);
6528 /* We are at the start of a bundle: emit the template
6529 (it should be defined). */
6532 b
= gen_bundle_selector (GEN_INT (template0
));
6533 ia64_emit_insn_before (b
, nop
);
6534 /* If we have two bundle window, we make one bundle
6535 rotation. Otherwise template0 will be undefined
6536 (negative value). */
6537 template0
= template1
;
6541 /* Move the position backward in the window. Group barrier has
6542 no slot. Asm insn takes all bundle. */
6543 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6544 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6545 && asm_noperands (PATTERN (insn
)) < 0)
6547 /* Long insn takes 2 slots. */
6548 if (ia64_safe_type (insn
) == TYPE_L
)
6553 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6554 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6555 && asm_noperands (PATTERN (insn
)) < 0)
6557 /* The current insn is at the bundle start: emit the
6561 b
= gen_bundle_selector (GEN_INT (template0
));
6562 ia64_emit_insn_before (b
, insn
);
6563 b
= PREV_INSN (insn
);
6565 /* See comment above in analogous place for emitting nops
6567 template0
= template1
;
6570 /* Emit nops after the current insn. */
6571 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
6574 ia64_emit_insn_before (nop
, insn
);
6575 nop
= PREV_INSN (insn
);
6582 /* See comment above in analogous place for emitting nops
6586 b
= gen_bundle_selector (GEN_INT (template0
));
6587 ia64_emit_insn_before (b
, insn
);
6588 b
= PREV_INSN (insn
);
6590 template0
= template1
;
6595 if (ia64_tune
== PROCESSOR_ITANIUM
)
6596 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
6597 Itanium1 has a strange design, if the distance between an insn
6598 and dependent MM-insn is less 4 then we have a 6 additional
6599 cycles stall. So we make the distance equal to 4 cycles if it
6601 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6606 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6607 || GET_CODE (PATTERN (insn
)) == USE
6608 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6610 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6611 if (INSN_UID (insn
) < clocks_length
&& add_cycles
[INSN_UID (insn
)])
6612 /* We found a MM-insn which needs additional cycles. */
6618 /* Now we are searching for a template of the bundle in
6619 which the MM-insn is placed and the position of the
6620 insn in the bundle (0, 1, 2). Also we are searching
6621 for that there is a stop before the insn. */
6622 last
= prev_active_insn (insn
);
6623 pred_stop_p
= recog_memoized (last
) == CODE_FOR_insn_group_barrier
;
6625 last
= prev_active_insn (last
);
6627 for (;; last
= prev_active_insn (last
))
6628 if (recog_memoized (last
) == CODE_FOR_bundle_selector
)
6630 template0
= XINT (XVECEXP (PATTERN (last
), 0, 0), 0);
6632 /* The insn is in MLX bundle. Change the template
6633 onto MFI because we will add nops before the
6634 insn. It simplifies subsequent code a lot. */
6636 = gen_bundle_selector (const2_rtx
); /* -> MFI */
6639 else if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
6640 && (ia64_safe_itanium_class (last
)
6641 != ITANIUM_CLASS_IGNORE
))
6643 /* Some check of correctness: the stop is not at the
6644 bundle start, there are no more 3 insns in the bundle,
6645 and the MM-insn is not at the start of bundle with
6647 if ((pred_stop_p
&& n
== 0) || n
> 2
6648 || (template0
== 9 && n
!= 0))
6650 /* Put nops after the insn in the bundle. */
6651 for (j
= 3 - n
; j
> 0; j
--)
6652 ia64_emit_insn_before (gen_nop (), insn
);
6653 /* It takes into account that we will add more N nops
6654 before the insn lately -- please see code below. */
6655 add_cycles
[INSN_UID (insn
)]--;
6656 if (!pred_stop_p
|| add_cycles
[INSN_UID (insn
)])
6657 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6660 add_cycles
[INSN_UID (insn
)]--;
6661 for (i
= add_cycles
[INSN_UID (insn
)]; i
> 0; i
--)
6663 /* Insert "MII;" template. */
6664 ia64_emit_insn_before (gen_bundle_selector (const0_rtx
),
6666 ia64_emit_insn_before (gen_nop (), insn
);
6667 ia64_emit_insn_before (gen_nop (), insn
);
6670 /* To decrease code size, we use "MI;I;"
6672 ia64_emit_insn_before
6673 (gen_insn_group_barrier (GEN_INT (3)), insn
);
6676 ia64_emit_insn_before (gen_nop (), insn
);
6677 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6680 /* Put the MM-insn in the same slot of a bundle with the
6681 same template as the original one. */
6682 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0
)),
6684 /* To put the insn in the same slot, add necessary number
6686 for (j
= n
; j
> 0; j
--)
6687 ia64_emit_insn_before (gen_nop (), insn
);
6688 /* Put the stop if the original bundle had it. */
6690 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6694 free (index_to_bundle_states
);
6695 finish_bundle_state_table ();
6697 dfa_clean_insn_cache ();
6700 /* The following function is called at the end of scheduling BB or
6701 EBB. After reload, it inserts stop bits and does insn bundling. */
6704 ia64_sched_finish (FILE *dump
, int sched_verbose
)
6707 fprintf (dump
, "// Finishing schedule.\n");
6708 if (!reload_completed
)
6710 if (reload_completed
)
6712 final_emit_insn_group_barriers (dump
);
6713 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
6714 current_sched_info
->next_tail
);
6715 if (sched_verbose
&& dump
)
6716 fprintf (dump
, "// finishing %d-%d\n",
6717 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
6718 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
6724 /* The following function inserts stop bits in scheduled BB or EBB. */
6727 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
6730 int need_barrier_p
= 0;
6731 rtx prev_insn
= NULL_RTX
;
6733 init_insn_group_barriers ();
6735 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6736 insn
!= current_sched_info
->next_tail
;
6737 insn
= NEXT_INSN (insn
))
6739 if (GET_CODE (insn
) == BARRIER
)
6741 rtx last
= prev_active_insn (insn
);
6745 if (GET_CODE (last
) == JUMP_INSN
6746 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
6747 last
= prev_active_insn (last
);
6748 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6749 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
6751 init_insn_group_barriers ();
6753 prev_insn
= NULL_RTX
;
6755 else if (INSN_P (insn
))
6757 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
6759 init_insn_group_barriers ();
6761 prev_insn
= NULL_RTX
;
6763 else if (need_barrier_p
|| group_barrier_needed_p (insn
))
6765 if (TARGET_EARLY_STOP_BITS
)
6770 last
!= current_sched_info
->prev_head
;
6771 last
= PREV_INSN (last
))
6772 if (INSN_P (last
) && GET_MODE (last
) == TImode
6773 && stops_p
[INSN_UID (last
)])
6775 if (last
== current_sched_info
->prev_head
)
6777 last
= prev_active_insn (last
);
6779 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6780 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6782 init_insn_group_barriers ();
6783 for (last
= NEXT_INSN (last
);
6785 last
= NEXT_INSN (last
))
6787 group_barrier_needed_p (last
);
6791 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6793 init_insn_group_barriers ();
6795 group_barrier_needed_p (insn
);
6796 prev_insn
= NULL_RTX
;
6798 else if (recog_memoized (insn
) >= 0)
6800 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
6801 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6802 || asm_noperands (PATTERN (insn
)) >= 0);
6809 /* If the following function returns TRUE, we will use the the DFA
6813 ia64_first_cycle_multipass_dfa_lookahead (void)
6815 return (reload_completed
? 6 : 4);
6818 /* The following function initiates variable `dfa_pre_cycle_insn'. */
6821 ia64_init_dfa_pre_cycle_insn (void)
6823 if (temp_dfa_state
== NULL
)
6825 dfa_state_size
= state_size ();
6826 temp_dfa_state
= xmalloc (dfa_state_size
);
6827 prev_cycle_state
= xmalloc (dfa_state_size
);
6829 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
6830 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
6831 recog_memoized (dfa_pre_cycle_insn
);
6832 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
6833 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
6834 recog_memoized (dfa_stop_insn
);
6837 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
6838 used by the DFA insn scheduler. */
6841 ia64_dfa_pre_cycle_insn (void)
6843 return dfa_pre_cycle_insn
;
6846 /* The following function returns TRUE if PRODUCER (of type ilog or
6847 ld) produces address for CONSUMER (of type st or stf). */
6850 ia64_st_address_bypass_p (rtx producer
, rtx consumer
)
6854 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
6856 dest
= ia64_single_set (producer
);
6857 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
6858 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
6860 if (GET_CODE (reg
) == SUBREG
)
6861 reg
= SUBREG_REG (reg
);
6862 dest
= ia64_single_set (consumer
);
6863 if (dest
== NULL_RTX
|| (mem
= SET_DEST (dest
)) == NULL_RTX
6864 || GET_CODE (mem
) != MEM
)
6866 return reg_mentioned_p (reg
, mem
);
6869 /* The following function returns TRUE if PRODUCER (of type ilog or
6870 ld) produces address for CONSUMER (of type ld or fld). */
6873 ia64_ld_address_bypass_p (rtx producer
, rtx consumer
)
6875 rtx dest
, src
, reg
, mem
;
6877 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
6879 dest
= ia64_single_set (producer
);
6880 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
6881 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
6883 if (GET_CODE (reg
) == SUBREG
)
6884 reg
= SUBREG_REG (reg
);
6885 src
= ia64_single_set (consumer
);
6886 if (src
== NULL_RTX
|| (mem
= SET_SRC (src
)) == NULL_RTX
)
6888 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
6889 mem
= XVECEXP (mem
, 0, 0);
6890 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
6891 mem
= XEXP (mem
, 0);
6893 /* Note that LO_SUM is used for GOT loads. */
6894 if (GET_CODE (mem
) != LO_SUM
&& GET_CODE (mem
) != MEM
)
6897 return reg_mentioned_p (reg
, mem
);
6900 /* The following function returns TRUE if INSN produces address for a
6901 load/store insn. We will place such insns into M slot because it
6902 decreases its latency time. */
6905 ia64_produce_address_p (rtx insn
)
6911 /* Emit pseudo-ops for the assembler to describe predicate relations.
6912 At present this assumes that we only consider predicate pairs to
6913 be mutex, and that the assembler can deduce proper values from
6914 straight-line code. */
6917 emit_predicate_relation_info (void)
6921 FOR_EACH_BB_REVERSE (bb
)
6924 rtx head
= BB_HEAD (bb
);
6926 /* We only need such notes at code labels. */
6927 if (GET_CODE (head
) != CODE_LABEL
)
6929 if (GET_CODE (NEXT_INSN (head
)) == NOTE
6930 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
6931 head
= NEXT_INSN (head
);
6933 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
6934 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
6936 rtx p
= gen_rtx_REG (BImode
, r
);
6937 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
6938 if (head
== BB_END (bb
))
6944 /* Look for conditional calls that do not return, and protect predicate
6945 relations around them. Otherwise the assembler will assume the call
6946 returns, and complain about uses of call-clobbered predicates after
6948 FOR_EACH_BB_REVERSE (bb
)
6950 rtx insn
= BB_HEAD (bb
);
6954 if (GET_CODE (insn
) == CALL_INSN
6955 && GET_CODE (PATTERN (insn
)) == COND_EXEC
6956 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
6958 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
6959 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
6960 if (BB_HEAD (bb
) == insn
)
6962 if (BB_END (bb
) == insn
)
6966 if (insn
== BB_END (bb
))
6968 insn
= NEXT_INSN (insn
);
6973 /* Perform machine dependent operations on the rtl chain INSNS. */
6978 /* We are freeing block_for_insn in the toplev to keep compatibility
6979 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6980 compute_bb_for_insn ();
6982 /* If optimizing, we'll have split before scheduling. */
6984 split_all_insns (0);
6986 /* ??? update_life_info_in_dirty_blocks fails to terminate during
6987 non-optimizing bootstrap. */
6988 update_life_info (NULL
, UPDATE_LIFE_GLOBAL_RM_NOTES
, PROP_DEATH_NOTES
);
6990 if (ia64_flag_schedule_insns2
)
6992 timevar_push (TV_SCHED2
);
6993 ia64_final_schedule
= 1;
6995 initiate_bundle_states ();
6996 ia64_nop
= make_insn_raw (gen_nop ());
6997 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
6998 recog_memoized (ia64_nop
);
6999 clocks_length
= get_max_uid () + 1;
7000 stops_p
= xcalloc (1, clocks_length
);
7001 if (ia64_tune
== PROCESSOR_ITANIUM
)
7003 clocks
= xcalloc (clocks_length
, sizeof (int));
7004 add_cycles
= xcalloc (clocks_length
, sizeof (int));
7006 if (ia64_tune
== PROCESSOR_ITANIUM2
)
7008 pos_1
= get_cpu_unit_code ("2_1");
7009 pos_2
= get_cpu_unit_code ("2_2");
7010 pos_3
= get_cpu_unit_code ("2_3");
7011 pos_4
= get_cpu_unit_code ("2_4");
7012 pos_5
= get_cpu_unit_code ("2_5");
7013 pos_6
= get_cpu_unit_code ("2_6");
7014 _0mii_
= get_cpu_unit_code ("2b_0mii.");
7015 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
7016 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
7017 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
7018 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
7019 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
7020 _0mib_
= get_cpu_unit_code ("2b_0mib.");
7021 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
7022 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
7023 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
7024 _1mii_
= get_cpu_unit_code ("2b_1mii.");
7025 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
7026 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
7027 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
7028 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
7029 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
7030 _1mib_
= get_cpu_unit_code ("2b_1mib.");
7031 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
7032 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
7033 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
7037 pos_1
= get_cpu_unit_code ("1_1");
7038 pos_2
= get_cpu_unit_code ("1_2");
7039 pos_3
= get_cpu_unit_code ("1_3");
7040 pos_4
= get_cpu_unit_code ("1_4");
7041 pos_5
= get_cpu_unit_code ("1_5");
7042 pos_6
= get_cpu_unit_code ("1_6");
7043 _0mii_
= get_cpu_unit_code ("1b_0mii.");
7044 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
7045 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
7046 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
7047 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
7048 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
7049 _0mib_
= get_cpu_unit_code ("1b_0mib.");
7050 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
7051 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
7052 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
7053 _1mii_
= get_cpu_unit_code ("1b_1mii.");
7054 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
7055 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
7056 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
7057 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
7058 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
7059 _1mib_
= get_cpu_unit_code ("1b_1mib.");
7060 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
7061 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
7062 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
7064 schedule_ebbs (dump_file
);
7065 finish_bundle_states ();
7066 if (ia64_tune
== PROCESSOR_ITANIUM
)
7072 emit_insn_group_barriers (dump_file
);
7074 ia64_final_schedule
= 0;
7075 timevar_pop (TV_SCHED2
);
7078 emit_all_insn_group_barriers (dump_file
);
7080 /* A call must not be the last instruction in a function, so that the
7081 return address is still within the function, so that unwinding works
7082 properly. Note that IA-64 differs from dwarf2 on this point. */
7083 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7088 insn
= get_last_insn ();
7089 if (! INSN_P (insn
))
7090 insn
= prev_active_insn (insn
);
7091 /* Skip over insns that expand to nothing. */
7092 while (GET_CODE (insn
) == INSN
&& get_attr_empty (insn
) == EMPTY_YES
)
7094 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7095 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7097 insn
= prev_active_insn (insn
);
7099 if (GET_CODE (insn
) == CALL_INSN
)
7102 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7103 emit_insn (gen_break_f ());
7104 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7109 emit_predicate_relation_info ();
7111 if (ia64_flag_var_tracking
)
7113 timevar_push (TV_VAR_TRACKING
);
7114 variable_tracking_main ();
7115 timevar_pop (TV_VAR_TRACKING
);
7119 /* Return true if REGNO is used by the epilogue. */
7122 ia64_epilogue_uses (int regno
)
7127 /* With a call to a function in another module, we will write a new
7128 value to "gp". After returning from such a call, we need to make
7129 sure the function restores the original gp-value, even if the
7130 function itself does not use the gp anymore. */
7131 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
7133 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7134 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7135 /* For functions defined with the syscall_linkage attribute, all
7136 input registers are marked as live at all function exits. This
7137 prevents the register allocator from using the input registers,
7138 which in turn makes it possible to restart a system call after
7139 an interrupt without having to save/restore the input registers.
7140 This also prevents kernel data from leaking to application code. */
7141 return lookup_attribute ("syscall_linkage",
7142 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
7145 /* Conditional return patterns can't represent the use of `b0' as
7146 the return address, so we force the value live this way. */
7150 /* Likewise for ar.pfs, which is used by br.ret. */
7158 /* Return true if REGNO is used by the frame unwinder. */
7161 ia64_eh_uses (int regno
)
7163 if (! reload_completed
)
7166 if (current_frame_info
.reg_save_b0
7167 && regno
== current_frame_info
.reg_save_b0
)
7169 if (current_frame_info
.reg_save_pr
7170 && regno
== current_frame_info
.reg_save_pr
)
7172 if (current_frame_info
.reg_save_ar_pfs
7173 && regno
== current_frame_info
.reg_save_ar_pfs
)
7175 if (current_frame_info
.reg_save_ar_unat
7176 && regno
== current_frame_info
.reg_save_ar_unat
)
7178 if (current_frame_info
.reg_save_ar_lc
7179 && regno
== current_frame_info
.reg_save_ar_lc
)
7185 /* Return true if this goes in small data/bss. */
7187 /* ??? We could also support own long data here. Generating movl/add/ld8
7188 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7189 code faster because there is one less load. This also includes incomplete
7190 types which can't go in sdata/sbss. */
7193 ia64_in_small_data_p (tree exp
)
7195 if (TARGET_NO_SDATA
)
7198 /* We want to merge strings, so we never consider them small data. */
7199 if (TREE_CODE (exp
) == STRING_CST
)
7202 /* Functions are never small data. */
7203 if (TREE_CODE (exp
) == FUNCTION_DECL
)
7206 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
7208 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
7209 if (strcmp (section
, ".sdata") == 0
7210 || strcmp (section
, ".sbss") == 0)
7215 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7217 /* If this is an incomplete type with size 0, then we can't put it
7218 in sdata because it might be too big when completed. */
7219 if (size
> 0 && size
<= ia64_section_threshold
)
7226 /* Output assembly directives for prologue regions. */
7228 /* The current basic block number. */
7230 static bool last_block
;
7232 /* True if we need a copy_state command at the start of the next block. */
7234 static bool need_copy_state
;
7236 /* The function emits unwind directives for the start of an epilogue. */
7239 process_epilogue (void)
7241 /* If this isn't the last block of the function, then we need to label the
7242 current state, and copy it back in at the start of the next block. */
7246 fprintf (asm_out_file
, "\t.label_state 1\n");
7247 need_copy_state
= true;
7250 fprintf (asm_out_file
, "\t.restore sp\n");
7253 /* This function processes a SET pattern looking for specific patterns
7254 which result in emitting an assembly directive required for unwinding. */
7257 process_set (FILE *asm_out_file
, rtx pat
)
7259 rtx src
= SET_SRC (pat
);
7260 rtx dest
= SET_DEST (pat
);
7261 int src_regno
, dest_regno
;
7263 /* Look for the ALLOC insn. */
7264 if (GET_CODE (src
) == UNSPEC_VOLATILE
7265 && XINT (src
, 1) == UNSPECV_ALLOC
7266 && GET_CODE (dest
) == REG
)
7268 dest_regno
= REGNO (dest
);
7270 /* If this isn't the final destination for ar.pfs, the alloc
7271 shouldn't have been marked frame related. */
7272 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7275 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7276 ia64_dbx_register_number (dest_regno
));
7280 /* Look for SP = .... */
7281 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7283 if (GET_CODE (src
) == PLUS
)
7285 rtx op0
= XEXP (src
, 0);
7286 rtx op1
= XEXP (src
, 1);
7287 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7289 if (INTVAL (op1
) < 0)
7290 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
7293 process_epilogue ();
7298 else if (GET_CODE (src
) == REG
7299 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7300 process_epilogue ();
7307 /* Register move we need to look at. */
7308 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7310 src_regno
= REGNO (src
);
7311 dest_regno
= REGNO (dest
);
7316 /* Saving return address pointer. */
7317 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7319 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7320 ia64_dbx_register_number (dest_regno
));
7324 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7326 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7327 ia64_dbx_register_number (dest_regno
));
7330 case AR_UNAT_REGNUM
:
7331 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7333 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7334 ia64_dbx_register_number (dest_regno
));
7338 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7340 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7341 ia64_dbx_register_number (dest_regno
));
7344 case STACK_POINTER_REGNUM
:
7345 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7346 || ! frame_pointer_needed
)
7348 fprintf (asm_out_file
, "\t.vframe r%d\n",
7349 ia64_dbx_register_number (dest_regno
));
7353 /* Everything else should indicate being stored to memory. */
7358 /* Memory store we need to look at. */
7359 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7365 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7367 base
= XEXP (dest
, 0);
7370 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7371 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7373 base
= XEXP (XEXP (dest
, 0), 0);
7374 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7379 if (base
== hard_frame_pointer_rtx
)
7381 saveop
= ".savepsp";
7384 else if (base
== stack_pointer_rtx
)
7389 src_regno
= REGNO (src
);
7393 if (current_frame_info
.reg_save_b0
!= 0)
7395 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7399 if (current_frame_info
.reg_save_pr
!= 0)
7401 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7405 if (current_frame_info
.reg_save_ar_lc
!= 0)
7407 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7411 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7413 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7416 case AR_UNAT_REGNUM
:
7417 if (current_frame_info
.reg_save_ar_unat
!= 0)
7419 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7426 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7427 1 << (src_regno
- GR_REG (4)));
7435 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7436 1 << (src_regno
- BR_REG (1)));
7443 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7444 1 << (src_regno
- FR_REG (2)));
7447 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7448 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7449 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7450 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7451 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7452 1 << (src_regno
- FR_REG (12)));
7464 /* This function looks at a single insn and emits any directives
7465 required to unwind this insn. */
7467 process_for_unwind_directive (FILE *asm_out_file
, rtx insn
)
7469 if (flag_unwind_tables
7470 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7474 if (GET_CODE (insn
) == NOTE
7475 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7477 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
7479 /* Restore unwind state from immediately before the epilogue. */
7480 if (need_copy_state
)
7482 fprintf (asm_out_file
, "\t.body\n");
7483 fprintf (asm_out_file
, "\t.copy_state 1\n");
7484 need_copy_state
= false;
7488 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7491 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7493 pat
= XEXP (pat
, 0);
7495 pat
= PATTERN (insn
);
7497 switch (GET_CODE (pat
))
7500 process_set (asm_out_file
, pat
);
7506 int limit
= XVECLEN (pat
, 0);
7507 for (par_index
= 0; par_index
< limit
; par_index
++)
7509 rtx x
= XVECEXP (pat
, 0, par_index
);
7510 if (GET_CODE (x
) == SET
)
7511 process_set (asm_out_file
, x
);
7524 ia64_init_builtins (void)
7526 tree psi_type_node
= build_pointer_type (integer_type_node
);
7527 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7529 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7530 tree si_ftype_psi_si_si
7531 = build_function_type_list (integer_type_node
,
7532 psi_type_node
, integer_type_node
,
7533 integer_type_node
, NULL_TREE
);
7535 /* __sync_val_compare_and_swap_di */
7536 tree di_ftype_pdi_di_di
7537 = build_function_type_list (long_integer_type_node
,
7538 pdi_type_node
, long_integer_type_node
,
7539 long_integer_type_node
, NULL_TREE
);
7540 /* __sync_bool_compare_and_swap_di */
7541 tree si_ftype_pdi_di_di
7542 = build_function_type_list (integer_type_node
,
7543 pdi_type_node
, long_integer_type_node
,
7544 long_integer_type_node
, NULL_TREE
);
7545 /* __sync_synchronize */
7546 tree void_ftype_void
7547 = build_function_type (void_type_node
, void_list_node
);
7549 /* __sync_lock_test_and_set_si */
7550 tree si_ftype_psi_si
7551 = build_function_type_list (integer_type_node
,
7552 psi_type_node
, integer_type_node
, NULL_TREE
);
7554 /* __sync_lock_test_and_set_di */
7555 tree di_ftype_pdi_di
7556 = build_function_type_list (long_integer_type_node
,
7557 pdi_type_node
, long_integer_type_node
,
7560 /* __sync_lock_release_si */
7562 = build_function_type_list (void_type_node
, psi_type_node
, NULL_TREE
);
7564 /* __sync_lock_release_di */
7566 = build_function_type_list (void_type_node
, pdi_type_node
, NULL_TREE
);
7571 /* The __fpreg type. */
7572 fpreg_type
= make_node (REAL_TYPE
);
7573 /* ??? The back end should know to load/save __fpreg variables using
7574 the ldf.fill and stf.spill instructions. */
7575 TYPE_PRECISION (fpreg_type
) = 80;
7576 layout_type (fpreg_type
);
7577 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
7579 /* The __float80 type. */
7580 float80_type
= make_node (REAL_TYPE
);
7581 TYPE_PRECISION (float80_type
) = 80;
7582 layout_type (float80_type
);
7583 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
7585 /* The __float128 type. */
7588 tree float128_type
= make_node (REAL_TYPE
);
7589 TYPE_PRECISION (float128_type
) = 128;
7590 layout_type (float128_type
);
7591 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
7594 /* Under HPUX, this is a synonym for "long double". */
7595 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
7598 #define def_builtin(name, type, code) \
7599 lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \
7602 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7603 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7604 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7605 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7606 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7607 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7608 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di
,
7609 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7611 def_builtin ("__sync_synchronize", void_ftype_void
,
7612 IA64_BUILTIN_SYNCHRONIZE
);
7614 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7615 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7616 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7617 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7618 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7619 IA64_BUILTIN_LOCK_RELEASE_SI
);
7620 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7621 IA64_BUILTIN_LOCK_RELEASE_DI
);
7623 def_builtin ("__builtin_ia64_bsp",
7624 build_function_type (ptr_type_node
, void_list_node
),
7627 def_builtin ("__builtin_ia64_flushrs",
7628 build_function_type (void_type_node
, void_list_node
),
7629 IA64_BUILTIN_FLUSHRS
);
7631 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7632 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7633 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7634 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7635 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7636 IA64_BUILTIN_FETCH_AND_OR_SI
);
7637 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7638 IA64_BUILTIN_FETCH_AND_AND_SI
);
7639 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7640 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7641 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7642 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7644 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7645 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7646 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7647 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7648 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7649 IA64_BUILTIN_OR_AND_FETCH_SI
);
7650 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7651 IA64_BUILTIN_AND_AND_FETCH_SI
);
7652 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7653 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7654 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7655 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7657 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7658 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7659 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7660 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7661 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7662 IA64_BUILTIN_FETCH_AND_OR_DI
);
7663 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7664 IA64_BUILTIN_FETCH_AND_AND_DI
);
7665 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7666 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7667 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7668 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7670 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7671 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7672 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7673 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7674 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7675 IA64_BUILTIN_OR_AND_FETCH_DI
);
7676 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7677 IA64_BUILTIN_AND_AND_FETCH_DI
);
7678 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7679 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7680 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7681 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7686 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7694 cmpxchgsz.acq tmp = [ptr], tmp
7695 } while (tmp != ret)
7699 ia64_expand_fetch_and_op (optab binoptab
, enum machine_mode mode
,
7700 tree arglist
, rtx target
)
7702 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7705 arg0
= TREE_VALUE (arglist
);
7706 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7707 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7708 #ifdef POINTERS_EXTEND_UNSIGNED
7709 if (GET_MODE(mem
) != Pmode
)
7710 mem
= convert_memory_address (Pmode
, mem
);
7712 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7714 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7715 MEM_VOLATILE_P (mem
) = 1;
7717 if (target
&& register_operand (target
, mode
))
7720 ret
= gen_reg_rtx (mode
);
7722 emit_insn (gen_mf ());
7724 /* Special case for fetchadd instructions. */
7725 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7728 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7730 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7735 tmp
= gen_reg_rtx (mode
);
7736 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7737 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7738 emit_move_insn (tmp
, mem
);
7740 label
= gen_label_rtx ();
7742 emit_move_insn (ret
, tmp
);
7743 convert_move (ccv
, tmp
, /*unsignedp=*/1);
7745 /* Perform the specific operation. Special case NAND by noticing
7746 one_cmpl_optab instead. */
7747 if (binoptab
== one_cmpl_optab
)
7749 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7750 binoptab
= and_optab
;
7752 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7755 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7757 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7760 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
7765 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7772 ret = tmp <op> value;
7773 cmpxchgsz.acq tmp = [ptr], ret
7774 } while (tmp != old)
7778 ia64_expand_op_and_fetch (optab binoptab
, enum machine_mode mode
,
7779 tree arglist
, rtx target
)
7781 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7784 arg0
= TREE_VALUE (arglist
);
7785 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7786 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7787 #ifdef POINTERS_EXTEND_UNSIGNED
7788 if (GET_MODE(mem
) != Pmode
)
7789 mem
= convert_memory_address (Pmode
, mem
);
7792 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7794 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7795 MEM_VOLATILE_P (mem
) = 1;
7797 if (target
&& ! register_operand (target
, mode
))
7800 emit_insn (gen_mf ());
7801 tmp
= gen_reg_rtx (mode
);
7802 old
= gen_reg_rtx (mode
);
7803 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7804 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7806 emit_move_insn (tmp
, mem
);
7808 label
= gen_label_rtx ();
7810 emit_move_insn (old
, tmp
);
7811 convert_move (ccv
, tmp
, /*unsignedp=*/1);
7813 /* Perform the specific operation. Special case NAND by noticing
7814 one_cmpl_optab instead. */
7815 if (binoptab
== one_cmpl_optab
)
7817 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7818 binoptab
= and_optab
;
7820 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
7823 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
7825 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
7828 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
7833 /* Expand val_ and bool_compare_and_swap. For val_ we want:
7837 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7840 For bool_ it's the same except return ret == oldval.
7844 ia64_expand_compare_and_swap (enum machine_mode rmode
, enum machine_mode mode
,
7845 int boolp
, tree arglist
, rtx target
)
7847 tree arg0
, arg1
, arg2
;
7848 rtx mem
, old
, new, ccv
, tmp
, insn
;
7850 arg0
= TREE_VALUE (arglist
);
7851 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7852 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
7853 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7854 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7855 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
7857 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7858 MEM_VOLATILE_P (mem
) = 1;
7860 if (GET_MODE (old
) != mode
)
7861 old
= convert_to_mode (mode
, old
, /*unsignedp=*/1);
7862 if (GET_MODE (new) != mode
)
7863 new = convert_to_mode (mode
, new, /*unsignedp=*/1);
7865 if (! register_operand (old
, mode
))
7866 old
= copy_to_mode_reg (mode
, old
);
7867 if (! register_operand (new, mode
))
7868 new = copy_to_mode_reg (mode
, new);
7870 if (! boolp
&& target
&& register_operand (target
, mode
))
7873 tmp
= gen_reg_rtx (mode
);
7875 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
7876 convert_move (ccv
, old
, /*unsignedp=*/1);
7877 emit_insn (gen_mf ());
7879 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
7881 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
7887 target
= gen_reg_rtx (rmode
);
7888 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
7894 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7897 ia64_expand_lock_test_and_set (enum machine_mode mode
, tree arglist
,
7901 rtx mem
, new, ret
, insn
;
7903 arg0
= TREE_VALUE (arglist
);
7904 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7905 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7906 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
7908 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7909 MEM_VOLATILE_P (mem
) = 1;
7910 if (! register_operand (new, mode
))
7911 new = copy_to_mode_reg (mode
, new);
7913 if (target
&& register_operand (target
, mode
))
7916 ret
= gen_reg_rtx (mode
);
7919 insn
= gen_xchgsi (ret
, mem
, new);
7921 insn
= gen_xchgdi (ret
, mem
, new);
7927 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7930 ia64_expand_lock_release (enum machine_mode mode
, tree arglist
,
7931 rtx target ATTRIBUTE_UNUSED
)
7936 arg0
= TREE_VALUE (arglist
);
7937 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
7939 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
7940 MEM_VOLATILE_P (mem
) = 1;
7942 emit_move_insn (mem
, const0_rtx
);
7948 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
7949 enum machine_mode mode ATTRIBUTE_UNUSED
,
7950 int ignore ATTRIBUTE_UNUSED
)
7952 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
7953 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
7954 tree arglist
= TREE_OPERAND (exp
, 1);
7955 enum machine_mode rmode
= VOIDmode
;
7959 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
7960 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
7965 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
7966 case IA64_BUILTIN_LOCK_RELEASE_SI
:
7967 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
7968 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
7969 case IA64_BUILTIN_FETCH_AND_OR_SI
:
7970 case IA64_BUILTIN_FETCH_AND_AND_SI
:
7971 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
7972 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
7973 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
7974 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
7975 case IA64_BUILTIN_OR_AND_FETCH_SI
:
7976 case IA64_BUILTIN_AND_AND_FETCH_SI
:
7977 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
7978 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
7982 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
7987 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
7992 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
7993 case IA64_BUILTIN_LOCK_RELEASE_DI
:
7994 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
7995 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
7996 case IA64_BUILTIN_FETCH_AND_OR_DI
:
7997 case IA64_BUILTIN_FETCH_AND_AND_DI
:
7998 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
7999 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8000 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8001 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8002 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8003 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8004 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8005 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8015 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
8016 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
8017 return ia64_expand_compare_and_swap (rmode
, mode
, 1, arglist
,
8020 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
8021 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
8022 return ia64_expand_compare_and_swap (rmode
, mode
, 0, arglist
,
8025 case IA64_BUILTIN_SYNCHRONIZE
:
8026 emit_insn (gen_mf ());
8029 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
8030 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
8031 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
8033 case IA64_BUILTIN_LOCK_RELEASE_SI
:
8034 case IA64_BUILTIN_LOCK_RELEASE_DI
:
8035 return ia64_expand_lock_release (mode
, arglist
, target
);
8037 case IA64_BUILTIN_BSP
:
8038 if (! target
|| ! register_operand (target
, DImode
))
8039 target
= gen_reg_rtx (DImode
);
8040 emit_insn (gen_bsp_value (target
));
8041 #ifdef POINTERS_EXTEND_UNSIGNED
8042 target
= convert_memory_address (ptr_mode
, target
);
8046 case IA64_BUILTIN_FLUSHRS
:
8047 emit_insn (gen_flushrs ());
8050 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
8051 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
8052 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
8054 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
8055 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
8056 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
8058 case IA64_BUILTIN_FETCH_AND_OR_SI
:
8059 case IA64_BUILTIN_FETCH_AND_OR_DI
:
8060 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
8062 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8063 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8064 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
8066 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8067 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8068 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
8070 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8071 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8072 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
8074 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8075 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8076 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
8078 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8079 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8080 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
8082 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8083 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8084 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
8086 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8087 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8088 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
8090 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8091 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8092 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
8094 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8095 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8096 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
8105 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8106 most significant bits of the stack slot. */
8109 ia64_hpux_function_arg_padding (enum machine_mode mode
, tree type
)
8111 /* Exception to normal case for structures/unions/etc. */
8113 if (type
&& AGGREGATE_TYPE_P (type
)
8114 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
8117 /* Fall back to the default. */
8118 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
8121 /* Linked list of all external functions that are to be emitted by GCC.
8122 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8123 order to avoid putting out names that are never really used. */
8125 struct extern_func_list
GTY(())
8127 struct extern_func_list
*next
;
8131 static GTY(()) struct extern_func_list
*extern_func_head
;
8134 ia64_hpux_add_extern_decl (tree decl
)
8136 struct extern_func_list
*p
= ggc_alloc (sizeof (struct extern_func_list
));
8139 p
->next
= extern_func_head
;
8140 extern_func_head
= p
;
8143 /* Print out the list of used global functions. */
8146 ia64_hpux_file_end (void)
8148 struct extern_func_list
*p
;
8150 for (p
= extern_func_head
; p
; p
= p
->next
)
8152 tree decl
= p
->decl
;
8153 tree id
= DECL_ASSEMBLER_NAME (decl
);
8158 if (!TREE_ASM_WRITTEN (decl
) && TREE_SYMBOL_REFERENCED (id
))
8160 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
8162 TREE_ASM_WRITTEN (decl
) = 1;
8163 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
8164 fputs (TYPE_ASM_OP
, asm_out_file
);
8165 assemble_name (asm_out_file
, name
);
8166 fprintf (asm_out_file
, "," TYPE_OPERAND_FMT
"\n", "function");
8170 extern_func_head
= 0;
8173 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
8174 modes of word_mode and larger. Rename the TFmode libfuncs using the
8175 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8176 backward compatibility. */
8179 ia64_init_libfuncs (void)
8181 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
8182 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
8183 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
8184 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
8186 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
8187 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
8188 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
8189 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
8190 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
8192 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
8193 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
8194 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
8195 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
8196 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
8197 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
8199 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
8200 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
8201 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
8202 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
8204 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
8205 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
8208 /* Rename all the TFmode libfuncs using the HPUX conventions. */
8211 ia64_hpux_init_libfuncs (void)
8213 ia64_init_libfuncs ();
8215 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
8216 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
8217 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
8219 /* ia64_expand_compare uses this. */
8220 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
8222 /* These should never be used. */
8223 set_optab_libfunc (eq_optab
, TFmode
, 0);
8224 set_optab_libfunc (ne_optab
, TFmode
, 0);
8225 set_optab_libfunc (gt_optab
, TFmode
, 0);
8226 set_optab_libfunc (ge_optab
, TFmode
, 0);
8227 set_optab_libfunc (lt_optab
, TFmode
, 0);
8228 set_optab_libfunc (le_optab
, TFmode
, 0);
8231 /* Rename the division and modulus functions in VMS. */
8234 ia64_vms_init_libfuncs (void)
8236 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
8237 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
8238 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
8239 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
8240 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
8241 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
8242 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
8243 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
8246 /* Rename the TFmode libfuncs available from soft-fp in glibc using
8247 the HPUX conventions. */
8250 ia64_sysv4_init_libfuncs (void)
8252 ia64_init_libfuncs ();
8254 /* These functions are not part of the HPUX TFmode interface. We
8255 use them instead of _U_Qfcmp, which doesn't work the way we
8257 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
8258 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
8259 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
8260 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
8261 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
8262 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
8264 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8265 glibc doesn't have them. */
8268 /* Switch to the section to which we should output X. The only thing
8269 special we do here is to honor small data. */
8272 ia64_select_rtx_section (enum machine_mode mode
, rtx x
,
8273 unsigned HOST_WIDE_INT align
)
8275 if (GET_MODE_SIZE (mode
) > 0
8276 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
8279 default_elf_select_rtx_section (mode
, x
, align
);
8282 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8283 Pretend flag_pic is always set. */
8286 ia64_rwreloc_select_section (tree exp
, int reloc
, unsigned HOST_WIDE_INT align
)
8288 default_elf_select_section_1 (exp
, reloc
, align
, true);
8292 ia64_rwreloc_unique_section (tree decl
, int reloc
)
8294 default_unique_section_1 (decl
, reloc
, true);
8298 ia64_rwreloc_select_rtx_section (enum machine_mode mode
, rtx x
,
8299 unsigned HOST_WIDE_INT align
)
8301 int save_pic
= flag_pic
;
8303 ia64_select_rtx_section (mode
, x
, align
);
8304 flag_pic
= save_pic
;
8308 ia64_rwreloc_section_type_flags (tree decl
, const char *name
, int reloc
)
8310 return default_section_type_flags_1 (decl
, name
, reloc
, true);
8313 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8314 structure type and that the address of that type should be passed
8315 in out0, rather than in r8. */
8318 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
8320 tree ret_type
= TREE_TYPE (fntype
);
8322 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8323 as the structure return address parameter, if the return value
8324 type has a non-trivial copy constructor or destructor. It is not
8325 clear if this same convention should be used for other
8326 programming languages. Until G++ 3.4, we incorrectly used r8 for
8327 these return values. */
8328 return (abi_version_at_least (2)
8330 && TYPE_MODE (ret_type
) == BLKmode
8331 && TREE_ADDRESSABLE (ret_type
)
8332 && strcmp (lang_hooks
.name
, "GNU C++") == 0);
8335 /* Output the assembler code for a thunk function. THUNK_DECL is the
8336 declaration for the thunk function itself, FUNCTION is the decl for
8337 the target function. DELTA is an immediate constant offset to be
8338 added to THIS. If VCALL_OFFSET is nonzero, the word at
8339 *(*this + vcall_offset) should be added to THIS. */
8342 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
8343 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
8346 rtx
this, insn
, funexp
;
8347 unsigned int this_parmno
;
8348 unsigned int this_regno
;
8350 reload_completed
= 1;
8351 epilogue_completed
= 1;
8353 reset_block_changes ();
8355 /* Set things up as ia64_expand_prologue might. */
8356 last_scratch_gr_reg
= 15;
8358 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
8359 current_frame_info
.spill_cfa_off
= -16;
8360 current_frame_info
.n_input_regs
= 1;
8361 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
8363 /* Mark the end of the (empty) prologue. */
8364 emit_note (NOTE_INSN_PROLOGUE_END
);
8366 /* Figure out whether "this" will be the first parameter (the
8367 typical case) or the second parameter (as happens when the
8368 virtual function returns certain class objects). */
8370 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
8372 this_regno
= IN_REG (this_parmno
);
8373 if (!TARGET_REG_NAMES
)
8374 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
8376 this = gen_rtx_REG (Pmode
, this_regno
);
8379 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
8380 REG_POINTER (tmp
) = 1;
8381 if (delta
&& CONST_OK_FOR_I (delta
))
8383 emit_insn (gen_ptr_extend_plus_imm (this, tmp
, GEN_INT (delta
)));
8387 emit_insn (gen_ptr_extend (this, tmp
));
8390 /* Apply the constant offset, if required. */
8393 rtx delta_rtx
= GEN_INT (delta
);
8395 if (!CONST_OK_FOR_I (delta
))
8397 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8398 emit_move_insn (tmp
, delta_rtx
);
8401 emit_insn (gen_adddi3 (this, this, delta_rtx
));
8404 /* Apply the offset from the vtable, if required. */
8407 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
8408 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8412 rtx t
= gen_rtx_REG (ptr_mode
, 2);
8413 REG_POINTER (t
) = 1;
8414 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this));
8415 if (CONST_OK_FOR_I (vcall_offset
))
8417 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
,
8422 emit_insn (gen_ptr_extend (tmp
, t
));
8425 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this));
8429 if (!CONST_OK_FOR_J (vcall_offset
))
8431 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
8432 emit_move_insn (tmp2
, vcall_offset_rtx
);
8433 vcall_offset_rtx
= tmp2
;
8435 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
8439 emit_move_insn (gen_rtx_REG (ptr_mode
, 2),
8440 gen_rtx_MEM (ptr_mode
, tmp
));
8442 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
8444 emit_insn (gen_adddi3 (this, this, tmp
));
8447 /* Generate a tail call to the target function. */
8448 if (! TREE_USED (function
))
8450 assemble_external (function
);
8451 TREE_USED (function
) = 1;
8453 funexp
= XEXP (DECL_RTL (function
), 0);
8454 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
8455 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
8456 insn
= get_last_insn ();
8457 SIBLING_CALL_P (insn
) = 1;
8459 /* Code generation for calls relies on splitting. */
8460 reload_completed
= 1;
8461 epilogue_completed
= 1;
8462 try_split (PATTERN (insn
), insn
, 0);
8466 /* Run just enough of rest_of_compilation to get the insns emitted.
8467 There's not really enough bulk here to make other passes such as
8468 instruction scheduling worth while. Note that use_thunk calls
8469 assemble_start_function and assemble_end_function. */
8471 insn_locators_initialize ();
8472 emit_all_insn_group_barriers (NULL
);
8473 insn
= get_insns ();
8474 shorten_branches (insn
);
8475 final_start_function (insn
, file
, 1);
8476 final (insn
, file
, 1, 0);
8477 final_end_function ();
8479 reload_completed
= 0;
8480 epilogue_completed
= 0;
8484 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
8487 ia64_struct_value_rtx (tree fntype
,
8488 int incoming ATTRIBUTE_UNUSED
)
8490 if (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
))
8492 return gen_rtx_REG (Pmode
, GR_REG (8));
8495 #include "gt-ia64.h"